├── .gitignore
├── ChangeLog.md
├── Harpy.hs
├── Harpy
    ├── Call.hs
    ├── CodeGenMonad.hs
    ├── X86Assembler.hs
    ├── X86CGCombinators.hs
    ├── X86CodeGen.hs
    └── X86Disassembler.hs
├── LICENSE
├── Makefile
├── NEWS
├── README.md
├── Setup.hs
├── doc
    ├── Makefile
    ├── larger-tutorial.lhs
    └── tutorial.lhs
├── examples
    └── evaluator
    │   ├── ArithParser.hs
    │   ├── ArithTypes.hs
    │   └── Evaluator.hs
└── harpy.cabal


/.gitignore:
--------------------------------------------------------------------------------
 1 | dist
 2 | cabal-dev
 3 | *.o
 4 | *.hi
 5 | *.chi
 6 | *.chs.h
 7 | *.dyn_o
 8 | *.dyn_hi
 9 | .virtualenv
10 | .hpc
11 | .hsenv
12 | .cabal-sandbox/
13 | cabal.sandbox.config
14 | *.prof
15 | *.aux
16 | *.hp
17 | 


--------------------------------------------------------------------------------
/ChangeLog.md:
--------------------------------------------------------------------------------
 1 | ## 0.6.0.3
 2 | 
 3 | ## 0.6.0.2
 4 | 
 5 | * Update ChangeLog.
 6 | 
 7 | ## 0.6.0.1
 8 | 
 9 | * Updated harpy.cabal for new links to Github.
10 | 
11 | ## 0.6.0.0
12 | 
13 | * Re-licensed under BSD3
14 | * Updated to compile in GHC 7.8.3
15 | 


--------------------------------------------------------------------------------
/Harpy.hs:
--------------------------------------------------------------------------------
 1 | --------------------------------------------------------------------------
 2 | -- |
 3 | -- Module:      Harpy
 4 | -- Copyright:   (c) 2006-2015 Martin Grabmueller and Dirk Kleeblatt
 5 | -- License:     BSD3
 6 | --
 7 | -- Maintainer:  martin@grabmueller.de
 8 | -- Stability:   provisional
 9 | -- Portability: portable
10 | --
11 | -- Harpy is a library for run-time code generation of x86 machine code.
12 | --
13 | -- This is a convenience module which re-exports the modules which are
14 | -- essential for using Harpy.
15 | ----------------------------------------------------------------------------
16 | module Harpy(module Harpy.CodeGenMonad,
17 |              module Harpy.Call,
18 |              module Harpy.X86Assembler,
19 |              module Control.Monad.Trans) where
20 | 
21 | import Harpy.CodeGenMonad
22 | import Harpy.Call
23 | import Harpy.X86Assembler
24 | import Control.Monad.Trans
25 | 


--------------------------------------------------------------------------------
/Harpy/Call.hs:
--------------------------------------------------------------------------------
 1 | {-# OPTIONS -cpp #-}
 2 | 
 3 | --------------------------------------------------------------------------
 4 | -- |
 5 | -- Module      :  Harpy.Call
 6 | -- Copyright   :  (c) 2006-2015 Martin Grabmueller and Dirk Kleeblatt
 7 | -- License     :  BSD3
 8 | --
 9 | -- Maintainer  :  martin@grabmueller.de
10 | -- Stability   :  provisional
11 | -- Portability :  non-portable
12 | --
13 | -- Predefined call stubs for run-time generated code.
14 | --------------------------------------------------------------------------
15 | 
16 | module Harpy.Call where
17 | 
18 | import Harpy.CodeGenMonad
19 | 
20 | import Data.Word
21 | import Foreign.Ptr
22 | import Foreign.C
23 | 
24 | #ifndef __HADDOCK__
25 | 
26 | $(callDecl "callAsVoid" [t|Word32 -> IO ()|])
27 | -- $(callDecl "callAsWord32ToWord32" [t|Word32 -> Word32|])
28 | -- $(callDecl "callAs7PtrToVoid" [t|forall a b c d e f g . Ptr a -> Ptr b -> Ptr c -> Ptr d -> Ptr e -> Ptr f -> Ptr g -> () |])
29 | 
30 | #else
31 | 
32 | callAsVoid :: Word32 -> CodeGen e s ()
33 | callAsWord32ToWord32 :: Word32 -> CodeGen e s Word32
34 | callAs7PtrToVoid :: forall a b c d e f g e' s'. Ptr a -> Ptr b -> Ptr c -> Ptr d -> Ptr e -> Ptr f -> Ptr g -> CodeGen e' s' ()
35 | 
36 | #endif
37 | 


--------------------------------------------------------------------------------
/Harpy/CodeGenMonad.hs:
--------------------------------------------------------------------------------
  1 | {-# OPTIONS -cpp #-}
  2 | 
  3 | --------------------------------------------------------------------------
  4 | -- |
  5 | -- Module:      Harpy.CodeGenMonad
  6 | -- Copyright:   (c) 2006-20015 Martin Grabmueller and Dirk Kleeblatt
  7 | -- License:     BSD3
  8 | --
  9 | -- Maintainer:  martin@grabmueller.de
 10 | -- Stability:   provisional
 11 | -- Portability: portable (but generated code non-portable)
 12 | --
 13 | -- Monad for generating x86 machine code at runtime.
 14 | --
 15 | -- This is a combined reader-state-exception monad which handles all
 16 | -- the details of handling code buffers, emitting binary data,
 17 | -- relocation etc.
 18 | --
 19 | -- All the code generation functions in module "Harpy.X86CodeGen" live
 20 | -- in this monad and use its error reporting facilities as well as the
 21 | -- internal state maintained by the monad.
 22 | --
 23 | -- The library user can pass a user environment and user state through
 24 | -- the monad.  This state is independent from the internal state and
 25 | -- may be used by higher-level code generation libraries to maintain
 26 | -- their own state across code generation operations.
 27 | -- --------------------------------------------------------------------------
 28 | 
 29 | module Harpy.CodeGenMonad(
 30 |     -- * Types
 31 |           CodeGen,
 32 |           ErrMsg,
 33 |           RelocKind(..),
 34 |           Reloc,
 35 |           Label,
 36 |           FixupKind(..),
 37 |           CodeGenConfig(..),
 38 |           firstBuffer,
 39 |           defaultCodeGenConfig,
 40 |     -- * Functions
 41 |     -- ** General code generator monad operations
 42 |           failCodeGen,
 43 |     -- ** Accessing code generation internals
 44 |           getEntryPoint,
 45 |           getCodeOffset,
 46 |           getBasePtr,
 47 |           getCodeBufferList,
 48 |     -- ** Access to user state and environment
 49 |           setState,
 50 |           getState,
 51 |           getEnv,
 52 |           withEnv,
 53 |     -- ** Label management
 54 |           newLabel,
 55 |           newNamedLabel,
 56 |           setLabel,
 57 |           defineLabel,
 58 |           (@@),
 59 |           emitFixup,
 60 |           labelAddress,
 61 |           emitRelocInfo,
 62 |     -- ** Code emission
 63 |           emit8,
 64 |           emit8At,
 65 |           peek8At,
 66 |           emit32,
 67 |           emit32At,
 68 |           checkBufferSize,
 69 |           ensureBufferSize,
 70 |     -- ** Executing code generation
 71 |           runCodeGen,
 72 |           runCodeGenWithConfig,
 73 |     -- ** Calling generated functions
 74 |           callDecl,
 75 |     -- ** Interface to disassembler
 76 |           disassemble
 77 |     ) where
 78 | 
 79 | import Prelude hiding ((<>))
 80 | import qualified Harpy.X86Disassembler as Dis
 81 | 
 82 | import Control.Applicative
 83 | import Control.Monad
 84 | 
 85 | import Text.PrettyPrint.HughesPJ
 86 | 
 87 | import Numeric
 88 | 
 89 | import Data.List
 90 | import qualified Data.Map as Map
 91 | import Foreign
 92 | import Foreign.C.Types
 93 | import System.IO
 94 | 
 95 | import Control.Monad.Trans
 96 | 
 97 | import Language.Haskell.TH.Syntax
 98 | 
 99 | 
100 | -- | An error message produced by a code generation operation.
101 | type ErrMsg = Doc
102 | 
103 | -- | The code generation monad, a combined reader-state-exception
104 | -- monad.
105 | newtype CodeGen e s a = CodeGen ((e, CodeGenEnv) -> (s, CodeGenState) -> IO ((s, CodeGenState), Either ErrMsg a))
106 | 
107 | -- | Configuration of the code generator.  There are currently two
108 | -- configuration options.  The first is the number fo bytes to use for
109 | -- allocating code buffers (the first as well as additional buffers
110 | -- created in calls to 'ensureBufferSize'.  The second allows to pass
111 | -- in a pre-allocated code buffer and its size.  When this option is
112 | -- used, Harpy does not perform any code buffer resizing (calls to
113 | -- 'ensureBufferSize' will be equivalent to calls to
114 | -- 'checkBufferSize').
115 | data CodeGenConfig = CodeGenConfig {
116 |       codeBufferSize   :: Int,                   -- ^ Size of individual code buffer blocks.
117 |       customCodeBuffer :: Maybe (Ptr Word8, Int) -- ^ Code buffer passed in.
118 |     }
119 | 
120 | -- | Internal state of the code generator
121 | data CodeGenState = CodeGenState {
122 |       buffer        :: Ptr Word8,                    -- ^ Pointer to current code buffer.
123 |       bufferList    :: [(Ptr Word8, Int)],           -- ^ List of all other code buffers.
124 |       firstBuffer   :: Ptr Word8,                    -- ^ Pointer to first buffer.
125 |       bufferOfs     :: Int,                          -- ^ Current offset into buffer where next instruction will be stored.
126 |       bufferSize    :: Int,                          -- ^ Size of current buffer.
127 |       relocEntries  :: [Reloc],                      -- ^ List of all emitted relocation entries.
128 |       nextLabel     :: Int,                          -- ^ Counter for generating labels.
129 |       definedLabels :: Map.Map Int (Ptr Word8, Int, String), -- ^ Map of already defined labels.
130 |       pendingFixups :: Map.Map Int [FixupEntry],     -- ^ Map of labels which have been referenced, but not defined.
131 |       config        :: CodeGenConfig                 -- ^ Configuration record.
132 |     }
133 | 
134 | data FixupEntry = FixupEntry {
135 |       fueBuffer :: Ptr Word8,
136 |       fueOfs    :: Int,
137 |       fueKind   :: FixupKind
138 |     }
139 | 
140 | -- | Kind of a fixup entry.  When a label is emitted with
141 | -- 'defineLabel', all prior references to this label must be fixed
142 | -- up.  This data type tells how to perform the fixup operation.
143 | data FixupKind = Fixup8          -- ^ 8-bit relative reference
144 |                | Fixup16         -- ^ 16-bit relative reference
145 |                | Fixup32         -- ^ 32-bit relative reference
146 |                | Fixup32Absolute -- ^ 32-bit absolute reference
147 |                deriving (Show)
148 | 
149 | data CodeGenEnv = CodeGenEnv { tailContext :: Bool }
150 |    deriving (Show)
151 | 
152 | -- | Kind of relocation, for example PC-relative
153 | data RelocKind = RelocPCRel    -- ^ PC-relative relocation
154 |                | RelocAbsolute -- ^ Absolute address
155 |    deriving (Show)
156 | 
157 | -- | Relocation entry
158 | data Reloc = Reloc { offset :: Int,
159 |              -- ^ offset in code block which needs relocation
160 |                      kind :: RelocKind,
161 |              -- ^ kind of relocation
162 |                      address :: FunPtr ()
163 |              -- ^ target address
164 |            }
165 |    deriving (Show)
166 | 
167 | -- | Label
168 | data Label = Label Int String
169 |            deriving (Eq, Ord)
170 | 
171 | unCg :: CodeGen e s a -> ((e, CodeGenEnv) -> (s, CodeGenState) -> IO ((s, CodeGenState), Either ErrMsg a))
172 | unCg (CodeGen a) = a
173 | 
174 | instance Functor (CodeGen e s) where
175 |   fmap f m = CodeGen (\ env state -> do
176 |                          r <- unCg m env state
177 |                          case r of
178 |                            (state', Left err) -> return (state', Left err)
179 |                            (state', Right v) -> return (state', Right $ f v))
180 | 
181 | instance Applicative (CodeGen e s) where
182 |   pure x = cgReturn x
183 |   f <*> x = do
184 |     f' <- f
185 |     x' <- x
186 |     return $ f' x'
187 | 
188 | instance Monad (CodeGen e s) where
189 |     return x = cgReturn x
190 |     m >>= k = cgBind m k
191 | 
192 | instance MonadFail (CodeGen e s) where
193 |     fail err = cgFail err
194 | 
195 | cgReturn :: a -> CodeGen e s a
196 | cgReturn x = CodeGen (\_env state -> return (state, Right x))
197 | 
198 | cgFail :: String -> CodeGen e s a
199 | cgFail err = CodeGen (\_env state -> return (state, Left (text err)))
200 | 
201 | cgBind :: CodeGen e s a -> (a -> CodeGen e s a1) -> CodeGen e s a1
202 | cgBind m k = CodeGen (\env state ->
203 |                do r1 <- unCg m env state
204 |                   case r1 of
205 |                     (state', Left err) -> return (state', Left err)
206 |                     (state', Right v) -> unCg (k v) env state')
207 | 
208 | -- | Abort code generation with the given error message.
209 | failCodeGen :: Doc -> CodeGen e s a
210 | failCodeGen d = CodeGen (\_env state -> return (state, Left d))
211 | 
212 | instance MonadIO (CodeGen e s) where
213 |   liftIO st = CodeGen (\_env state -> do { r <- st; return (state, Right r) })
214 | 
215 | emptyCodeGenState :: CodeGenState
216 | emptyCodeGenState = CodeGenState { buffer = undefined,
217 |                                    bufferList = [],
218 |                                    firstBuffer = undefined,
219 |                                    bufferOfs = 0,
220 |                                    bufferSize = 0,
221 |                                    relocEntries = [],
222 |                                    nextLabel = 0,
223 |                                    definedLabels = Map.empty,
224 |                                    pendingFixups = Map.empty,
225 |                                    config = defaultCodeGenConfig}
226 | 
227 | -- | Default code generation configuration.  The code buffer size is
228 | -- set to 4KB, and code buffer management is automatic.  This value is
229 | -- intended to be used with record update syntax, for example:
230 | --
231 | -- >  runCodeGenWithConfig ... defaultCodeGenConfig{codeBufferSize = 128} ...
232 | defaultCodeGenConfig :: CodeGenConfig
233 | defaultCodeGenConfig = CodeGenConfig { codeBufferSize = defaultCodeBufferSize,
234 |                                        customCodeBuffer = Nothing }
235 | 
236 | defaultCodeBufferSize :: Int
237 | defaultCodeBufferSize = 4096
238 | 
239 | -- | Execute code generation, given a user environment and state.  The
240 | -- result is a tuple of the resulting user state and either an error
241 | -- message (when code generation failed) or the result of the code
242 | -- generation.  This function runs 'runCodeGenWithConfig' with a
243 | -- sensible default configuration.
244 | runCodeGen :: CodeGen e s a -> e -> s -> IO (s, Either ErrMsg a)
245 | runCodeGen cg uenv ustate =
246 |     runCodeGenWithConfig cg uenv ustate defaultCodeGenConfig
247 | 
248 | foreign import ccall "static stdlib.h"
249 |   memalign :: CUInt -> CUInt -> IO (Ptr a)
250 | 
251 | foreign import ccall "static sys/mman.h"
252 |   mprotect :: CUInt -> CUInt -> Int -> IO Int
253 | 
254 | -- | Like 'runCodeGen', but allows more control over the code
255 | -- generation process.  In addition to a code generator and a user
256 | -- environment and state, a code generation configuration must be
257 | -- provided.  A code generation configuration allows control over the
258 | -- allocation of code buffers, for example.
259 | runCodeGenWithConfig :: CodeGen e s a -> e -> s -> CodeGenConfig -> IO (s, Either ErrMsg a)
260 | runCodeGenWithConfig (CodeGen cg) uenv ustate conf =
261 |     do (buf, sze) <- case customCodeBuffer conf of
262 |                        Nothing -> do let initSize = codeBufferSize conf
263 |                                      let size = fromIntegral initSize
264 |                                      arr <- memalign 0x1000 size
265 |                                      -- 0x7 = PROT_{READ,WRITE,EXEC}
266 |                                      _ <- mprotect (fromIntegral $ ptrToIntPtr arr) size 0x7
267 |                                      return (arr, initSize)
268 |                        Just (buf, sze) -> return (buf, sze)
269 |        let env = CodeGenEnv {tailContext = True}
270 |        let state = emptyCodeGenState{buffer = buf,
271 |                                      bufferList = [],
272 |                                      firstBuffer = buf,
273 |                                      bufferSize = sze,
274 |                                      config = conf}
275 |        ((ustate', _), res) <- cg (uenv, env) (ustate, state)
276 |        return (ustate', res)
277 | 
278 | -- | Check whether the code buffer has room for at least the given
279 | -- number of bytes.  This should be called by code generators
280 | -- whenever it cannot be guaranteed that the code buffer is large
281 | -- enough to hold all the generated code.  Lets the code generation
282 | -- monad fail when the buffer overflows.
283 | --
284 | -- /Note:/ Starting with version 0.4, Harpy automatically checks for
285 | -- buffer overflow, so you do not need to call this function anymore.
286 | checkBufferSize :: Int -> CodeGen e s ()
287 | checkBufferSize needed =
288 |     do state <- getInternalState
289 |        unless (bufferOfs state + needed <= bufferSize state)
290 |               (failCodeGen (text "code generation buffer overflow: needed additional" <+>
291 |                             int needed <+> text "bytes (offset =" <+>
292 |                             int (bufferOfs state) <>
293 |                             text ", buffer size =" <+>
294 |                             int (bufferSize state) <> text ")"))
295 | 
296 | -- | Make sure that the code buffer has room for at least the given
297 | -- number of bytes.  This should be called by code generators whenever
298 | -- it cannot be guaranteed that the code buffer is large enough to
299 | -- hold all the generated code.  Creates a new buffer and places a
300 | -- jump to the new buffer when there is not sufficient space
301 | -- available.  When code generation was invoked with a pre-defined
302 | -- code buffer, code generation is aborted on overflow.
303 | --
304 | -- /Note:/ Starting with version 0.4, Harpy automatically checks for
305 | -- buffer overflow, so you do not need to call this function anymore.
306 | ensureBufferSize :: Int -> CodeGen e s ()
307 | ensureBufferSize needed =
308 |     do state <- getInternalState
309 |        case (customCodeBuffer (config state)) of
310 |          Nothing ->
311 |              unless (bufferOfs state + needed + 5 <= bufferSize state)
312 |                         (do let incrSize = max (needed + 16) (codeBufferSize (config state))
313 |                             arr <- liftIO $ mallocBytes incrSize
314 |                             ofs <- getCodeOffset
315 |                             let buf = buffer state
316 |                                 disp :: Int
317 |                                 disp = arr `minusPtr` (buf `plusPtr` ofs) - 5
318 |                             emit8 0xe9    -- FIXME: Machine dependent!
319 |                             emit32 (fromIntegral disp)
320 |                             st <- getInternalState
321 |                             setInternalState st{buffer = arr, bufferList = bufferList st ++ [(buffer st, bufferOfs st)], bufferOfs = 0})
322 |          Just (_, _) -> checkBufferSize needed
323 | 
324 | -- | Return a pointer to the beginning of the first code buffer, which
325 | -- is normally the entry point to the generated code.
326 | getEntryPoint :: CodeGen e s (Ptr Word8)
327 | getEntryPoint =
328 |     CodeGen (\ _ (ustate, state) ->
329 |       return $ ((ustate, state), Right (firstBuffer state)))
330 | 
331 | -- | Return the current offset in the code buffer, e.g. the offset
332 | -- at which the next instruction will be emitted.
333 | getCodeOffset :: CodeGen e s Int
334 | getCodeOffset =
335 |     CodeGen (\ _ (ustate, state) ->
336 |       return $ ((ustate, state), Right (bufferOfs state)))
337 | 
338 | -- | Set the user state to the given value.
339 | setState :: s -> CodeGen e s ()
340 | setState st =
341 |     CodeGen (\ _ (_, state) ->
342 |       return $ ((st, state), Right ()))
343 | 
344 | -- | Return the current user state.
345 | getState :: CodeGen e s s
346 | getState =
347 |     CodeGen (\ _ (ustate, state) ->
348 |       return $ ((ustate, state), Right (ustate)))
349 | 
350 | -- | Return the current user environment.
351 | getEnv :: CodeGen e s e
352 | getEnv =
353 |     CodeGen (\ (uenv, _) state ->
354 |       return $ (state, Right uenv))
355 | 
356 | -- | Set the environment to the given value and execute the given
357 | -- code generation in this environment.
358 | withEnv :: e -> CodeGen e s r -> CodeGen e s r
359 | withEnv e (CodeGen cg) =
360 |     CodeGen (\ (_, env) state ->
361 |       cg (e, env) state)
362 | 
363 | -- | Set the user state to the given value.
364 | setInternalState :: CodeGenState -> CodeGen e s ()
365 | setInternalState st =
366 |     CodeGen (\ _ (ustate, _) ->
367 |       return $ ((ustate, st), Right ()))
368 | 
369 | -- | Return the current user state.
370 | getInternalState :: CodeGen e s CodeGenState
371 | getInternalState =
372 |     CodeGen (\ _ (ustate, state) ->
373 |       return $ ((ustate, state), Right (state)))
374 | 
375 | -- | Return the pointer to the start of the code buffer.
376 | getBasePtr :: CodeGen e s (Ptr Word8)
377 | getBasePtr =
378 |     CodeGen (\ _ (ustate, state) ->
379 |       return $ ((ustate, state), Right (buffer state)))
380 | 
381 | -- | Return a list of all code buffers and their respective size
382 | -- (i.e., actually used space for code, not allocated size).
383 | getCodeBufferList :: CodeGen e s [(Ptr Word8, Int)]
384 | getCodeBufferList = do st <- getInternalState
385 |                        return $ bufferList st ++ [(buffer st, bufferOfs st)]
386 | 
387 | -- | Generate a new label to be used with the label operations
388 | -- 'emitFixup' and 'defineLabel'.
389 | newLabel :: CodeGen e s Label
390 | newLabel =
391 |     do state <- getInternalState
392 |        let lab = nextLabel state
393 |        setInternalState state{nextLabel = lab + 1}
394 |        return (Label lab "")
395 | 
396 | -- | Generate a new label to be used with the label operations
397 | -- 'emitFixup' and 'defineLabel'.  The given name is used for
398 | -- diagnostic purposes, and will appear in the disassembly.
399 | newNamedLabel :: String -> CodeGen e s Label
400 | newNamedLabel name =
401 |     do state <- getInternalState
402 |        let lab = nextLabel state
403 |        setInternalState state{nextLabel = lab + 1}
404 |        return (Label lab name)
405 | 
406 | -- | Generate a new label and define it at once
407 | setLabel :: CodeGen e s Label
408 | setLabel =
409 |     do l <- newLabel
410 |        defineLabel l
411 |        return l
412 | 
413 | -- | Emit a relocation entry for the given offset, relocation kind
414 | -- and target address.
415 | emitRelocInfo :: Int -> RelocKind -> FunPtr a -> CodeGen e s ()
416 | emitRelocInfo ofs knd addr =
417 |     do state <- getInternalState
418 |        setInternalState state{relocEntries =
419 |                               Reloc{offset = ofs,
420 |                                     kind = knd,
421 |                                     address = castFunPtr addr} :
422 |                               (relocEntries state)}
423 | 
424 | -- | Emit a byte value to the code buffer.
425 | emit8 :: Word8 -> CodeGen e s ()
426 | emit8 op =
427 |     CodeGen (\ _ (ustate, state) ->
428 |       do let buf = buffer state
429 |              ptr = bufferOfs state
430 |          pokeByteOff buf ptr op
431 |          return $ ((ustate, state{bufferOfs = ptr + 1}), Right ()))
432 | 
433 | -- | Store a byte value at the given offset into the code buffer.
434 | emit8At :: Int -> Word8 -> CodeGen e s ()
435 | emit8At pos op =
436 |     CodeGen (\ _ (ustate, state) ->
437 |       do let buf = buffer state
438 |          pokeByteOff buf pos op
439 |          return $ ((ustate, state), Right ()))
440 | 
441 | -- | Return the byte value at the given offset in the code buffer.
442 | peek8At :: Int -> CodeGen e s Word8
443 | peek8At pos =
444 |     CodeGen (\ _ (ustate, state) ->
445 |       do let buf = buffer state
446 |          b <- peekByteOff buf pos
447 |          return $ ((ustate, state), Right b))
448 | 
449 | -- | Like 'emit8', but for a 32-bit value.
450 | emit32 :: Word32 -> CodeGen e s ()
451 | emit32 op =
452 |     CodeGen (\ _ (ustate, state) ->
453 |       do let buf = buffer state
454 |              ptr = bufferOfs state
455 |          pokeByteOff buf ptr op
456 |          return $ ((ustate, state{bufferOfs = ptr + 4}), Right ()))
457 | 
458 | -- | Like 'emit8At', but for a 32-bit value.
459 | emit32At :: Int -> Word32 -> CodeGen e s ()
460 | emit32At pos op =
461 |     CodeGen (\ _ (ustate, state) ->
462 |       do let buf = buffer state
463 |          pokeByteOff buf pos op
464 |          return $ ((ustate, state), Right ()))
465 | 
466 | -- | Emit a label at the current offset in the code buffer.  All
467 | -- references to the label will be relocated to this offset.
468 | defineLabel :: Label -> CodeGen e s ()
469 | defineLabel (Label lab name) =
470 |     do state <- getInternalState
471 |        case Map.lookup lab (definedLabels state) of
472 |          Just _ -> failCodeGen $ text "duplicate definition of label" <+>
473 |                      int lab
474 |          _ -> return ()
475 |        case Map.lookup lab (pendingFixups state) of
476 |          Just fixups -> do mapM_ (performFixup (buffer state) (bufferOfs state)) fixups
477 |                            setInternalState state{pendingFixups = Map.delete lab (pendingFixups state)}
478 |          Nothing -> return ()
479 |        state1 <- getInternalState
480 |        setInternalState state1{definedLabels = Map.insert lab (buffer state1, bufferOfs state1, name) (definedLabels state1)}
481 | 
482 | performFixup :: Ptr Word8 -> Int -> FixupEntry -> CodeGen e s ()
483 | performFixup labBuf labOfs (FixupEntry{fueBuffer = buf, fueOfs = ofs, fueKind = knd}) =
484 |     do let diff = (labBuf `plusPtr` labOfs) `minusPtr` (buf `plusPtr` ofs)
485 |        liftIO $ case knd of
486 |                   Fixup8  -> pokeByteOff buf ofs (fromIntegral diff - 1 :: Word8)
487 |                   Fixup16 -> pokeByteOff buf ofs (fromIntegral diff - 2 :: Word16)
488 |                   Fixup32 -> pokeByteOff buf ofs (fromIntegral diff - 4 :: Word32)
489 |                   Fixup32Absolute -> pokeByteOff buf ofs (fromIntegral (ptrToWordPtr (labBuf `plusPtr` labOfs)) :: Word32)
490 |        return ()
491 | 
492 | 
493 | -- | This operator gives neat syntax for defining labels.  When @l@ is a label, the code
494 | --
495 | -- > l @@ mov eax ebx
496 | --
497 | -- associates the label l with the following @mov@ instruction.
498 | (@@) :: Label -> CodeGen e s a -> CodeGen e s a
499 | (@@) lab gen = do defineLabel lab
500 |                   gen
501 | 
502 | -- | Emit a fixup entry for the given label at the current offset in
503 | -- the code buffer (unless the label is already defined).
504 | -- The instruction at this offset will
505 | -- be patched to target the address associated with this label when
506 | -- it is defined later.
507 | emitFixup :: Label -> Int -> FixupKind -> CodeGen e s ()
508 | emitFixup (Label lab _) ofs knd =
509 |     do state <- getInternalState
510 |        let base = buffer state
511 |            ptr = bufferOfs state
512 |            fue = FixupEntry{fueBuffer = base,
513 |                             fueOfs = ptr + ofs,
514 |                             fueKind = knd}
515 |        case Map.lookup lab (definedLabels state) of
516 |          Just (labBuf, labOfs, _) -> performFixup labBuf labOfs fue
517 |          Nothing -> setInternalState state{pendingFixups = Map.insertWith (++) lab [fue] (pendingFixups state)}
518 | 
519 | -- | Return the address of a label, fail if the label is not yet defined.
520 | labelAddress :: Label -> CodeGen e s (Ptr a)
521 | labelAddress (Label lab name) = do
522 |   state <- getInternalState
523 |   case Map.lookup lab (definedLabels state) of
524 |     Just (labBuf, labOfs, _) -> return $ plusPtr labBuf labOfs
525 |     Nothing -> fail $ "Label " ++ show lab ++ "(" ++ name ++ ") not yet defined"
526 | 
527 | 
528 | -- | Disassemble all code buffers.  The result is a list of
529 | -- disassembled instructions which can be converted to strings using
530 | -- the 'Dis.showIntel' or 'Dis.showAtt' functions from module
531 | -- "Harpy.X86Disassembler".
532 | disassemble :: CodeGen e s [Dis.Instruction]
533 | disassemble = do
534 |   s <- getInternalState
535 |   let buffers = bufferList s
536 |   r <- mapM (\ (buff, len) -> do
537 |              r <- liftIO $ Dis.disassembleBlock buff len
538 |              case r of
539 |                     Left err -> cgFail $ show err
540 |                     Right instr -> return instr
541 |             ) $ buffers ++ [(buffer s, bufferOfs s)]
542 |   r' <- insertLabels (concat r)
543 |   return r'
544 |  where insertLabels :: [Dis.Instruction] -> CodeGen e s [Dis.Instruction]
545 |        insertLabels = liftM concat . mapM ins
546 |        ins :: Dis.Instruction -> CodeGen e s [Dis.Instruction]
547 |        ins i@(Dis.BadInstruction{}) = return [i]
548 |        ins i@(Dis.PseudoInstruction{}) = return [i]
549 |        ins i@(Dis.Instruction{Dis.address = addr}) =
550 |            do state <- getInternalState
551 |               let allLabs = Map.toList (definedLabels state)
552 |                   labs = filter (\ (_, (buf, ofs, _)) -> fromIntegral (ptrToWordPtr (buf `plusPtr` ofs)) == addr) allLabs
553 |                   createLabel (l, (buf, ofs, name)) = Dis.PseudoInstruction addr
554 |                                                         (case name of
555 |                                                            "" ->
556 |                                                                "label " ++ show l ++
557 |                                                                 " [" ++
558 |                                                                 hex32 (fromIntegral (ptrToWordPtr (buf `plusPtr` ofs))) ++
559 |                                                                 "]"
560 |                                                            _ -> name ++ ": [" ++
561 |                                                                   hex32 (fromIntegral (ptrToWordPtr (buf `plusPtr` ofs))) ++
562 |                                                                   "]")
563 |               return $ fmap createLabel labs ++ [i]
564 |        hex32 :: Int -> String
565 |        hex32 i =
566 |               let w :: Word32
567 |                   w = fromIntegral i
568 |                   s = showHex w ""
569 |               in take (8 - length s) (repeat '0') ++ s
570 | 
571 | #ifndef __HADDOCK__
572 | 
573 | callDecl :: String -> Q Type -> Q [Dec]
574 | callDecl ns qt =  do
575 |     t0 <- qt
576 |     let (tvars, cxt, t) = case t0 of
577 |                          ForallT vs c t' -> (vs, c, t')
578 |                          _ -> ([], [], t0)
579 |     let name = mkName ns
580 |     let funptr = AppT (ConT $ mkName "FunPtr") t
581 |     let ioresult = t -- addIO t
582 |     let ty = AppT (AppT ArrowT funptr) ioresult
583 |     dynName <- newName "conv"
584 |     let dyn = ForeignD $ ImportF CCall Safe "dynamic" dynName $ ForallT tvars cxt ty
585 |     vs <- mkArgs t
586 |     cbody <- [| CodeGen (\env (ustate, state) ->
587 |                         do let code = firstBuffer state
588 |                            res <- liftIO $ $(do
589 |                                              c <- newName "c"
590 |                                              cast <- [|castPtrToFunPtr|]
591 |                                              let f = AppE (VarE dynName)
592 |                                                           (AppE cast
593 |                                                                 (VarE c))
594 |                                              return $ LamE [VarP c] $ foldl AppE f $ map VarE vs
595 |                                             ) code
596 |                            return $ ((ustate, state), Right res))|]
597 |     let call = ValD (VarP name) (NormalB $ LamE (map VarP vs) cbody) []
598 |     return [ dyn, call ]
599 | 
600 | mkArgs (AppT (AppT ArrowT _from) to) = do
601 |   v  <- newName "v"
602 |   vs <- mkArgs to
603 |   return $ v : vs
604 | mkArgs _ = return []
605 | 
606 | addIO (AppT t@(AppT ArrowT _from) to) = AppT t $ addIO to
607 | addIO t = AppT (ConT $ mkName "IO") t
608 | 
609 | #else
610 | 
611 | -- | Declare a stub function to call the code buffer. Arguments are the name
612 | -- of the generated function, and the type the code buffer is supposed to have.
613 | -- The type argument can be given using the [t| ... |] notation of Template Haskell.
614 | -- Allowed types are the legal types for FFI functions.
615 | callDecl :: String -> Q Type -> Q [Dec]
616 | 
617 | #endif
618 | 


--------------------------------------------------------------------------------
/Harpy/X86CGCombinators.hs:
--------------------------------------------------------------------------------
  1 | --------------------------------------------------------------------------
  2 | -- |
  3 | -- Module      :  X86CodeGen
  4 | -- Copyright   :  (c) 2006-2015 Martin Grabmueller and Dirk Kleeblatt
  5 | -- License     :  BSD3
  6 | --
  7 | -- Maintainer  :  martin@grabmueller.de
  8 | -- Stability   :  quite experimental
  9 | -- Portability :  portable (but generated code non-portable)
 10 | --
 11 | -- This module exports several combinators for writing loops,
 12 | -- conditionals and function prolog\/epilog code.
 13 | --
 14 | -- Note: this module is under heavy development and the exported API
 15 | -- is definitely not yet stable.
 16 | --------------------------------------------------------------------------
 17 | 
 18 | module Harpy.X86CGCombinators(
 19 |   -- * Types
 20 |   UserState(..),
 21 |   UserEnv(..),
 22 |   emptyUserEnv,
 23 |   emptyUserState,
 24 |   CtrlDest(..),
 25 |   DataDest(..),
 26 |   -- * Combinators
 27 |   ifThenElse,
 28 |   doWhile,
 29 |   continue,
 30 |   continueBranch,
 31 |   saveRegs,
 32 |   function,
 33 |   withDataDest,
 34 |   withCtrlDest,
 35 |   withDest,
 36 |    ) where
 37 | 
 38 | import Text.PrettyPrint.HughesPJ
 39 | 
 40 | import Foreign
 41 | import Data.Word
 42 | 
 43 | import Harpy.CodeGenMonad
 44 | import Harpy.X86CodeGen
 45 | import Harpy.X86Assembler
 46 | 
 47 | -- | Destination for a calculated value.
 48 | data DataDest = RegDest Reg32            -- ^ Store into specific register
 49 |               | StackDest                -- ^ Push onto stack
 50 |               | MemBaseDest Reg32 Word32 -- ^ Store at memory address
 51 |               | Ignore                   -- ^ Throw result away.
 52 | 
 53 | -- | Destination for control transfers
 54 | data CtrlDest = FallThrough              -- ^ Go to next instruction
 55 |               | Return                   -- ^ Return from current functio
 56 |               | Goto Label               -- ^ Go to specific label
 57 |               | Branch CtrlDest CtrlDest -- ^ Go to one of the given labels
 58 |                                          -- depending on outcome of test
 59 | 
 60 | -- | User state is used to maintain bitmask of registers currently in use.
 61 | data UserState = UserState {}
 62 | 
 63 | 
 64 | -- | User environment stores code generators for accessing specific
 65 | -- variables as well as the current data and control destinations
 66 | data UserEnv = UserEnv { bindings :: [(String, CodeGen UserEnv UserState ())],
 67 |                          dataDest :: DataDest,
 68 |                          ctrlDest :: CtrlDest }
 69 | 
 70 | emptyUserState :: UserState
 71 | emptyUserState = UserState{}
 72 | 
 73 | emptyUserEnv :: UserEnv
 74 | emptyUserEnv = UserEnv{bindings = [], dataDest = Ignore,
 75 |                        ctrlDest = Return}
 76 | 
 77 | ifThenElse :: CodeGen UserEnv s r
 78 | 	      -> CodeGen UserEnv s a
 79 | 	      -> CodeGen UserEnv s a1
 80 | 	      -> CodeGen UserEnv s ()
 81 | ifThenElse condCg thenCg elseCg =
 82 |     do env <- getEnv
 83 |        elseLabel <- newLabel
 84 |        endLabel <- newLabel
 85 |        withDest Ignore (Branch FallThrough (Goto elseLabel)) condCg
 86 |        withCtrlDest (case ctrlDest env of
 87 |                        FallThrough -> Goto endLabel
 88 |                        _ -> ctrlDest env)
 89 |                     (thenCg >> continue)
 90 |        elseLabel @@ (elseCg >> continue)
 91 |        endLabel @@ return ()
 92 | 
 93 | doWhile :: CodeGen UserEnv s r -> CodeGen UserEnv s a -> CodeGen UserEnv s ()
 94 | doWhile condCg bodyCg =
 95 |     do topLabel <- newLabel
 96 |        testLabel <- newLabel
 97 |        jmp testLabel
 98 |        topLabel @@ withCtrlDest FallThrough (bodyCg >> continue)
 99 |        testLabel @@ withDest Ignore (Branch (Goto topLabel) FallThrough)
100 |                         condCg
101 |        continue
102 | 
103 | doFor :: (Mov a Word32, Add a Word32, Cmp a Word32) => a -> Word32 -> Word32 -> Int32 ->
104 |          CodeGen UserEnv s r ->
105 |          CodeGen UserEnv s ()
106 | doFor loc from to step body =
107 |     do topLabel <- newLabel
108 |        testLabel <- newLabel
109 |        mov loc from
110 |        jmp testLabel
111 |        topLabel @@ withCtrlDest FallThrough  (body >> continue)
112 |        testLabel @@ cmp loc to
113 |        add loc (fromIntegral step :: Word32)
114 |        if step < 0
115 |           then jge topLabel
116 |           else jle topLabel
117 |        continue
118 | 
119 | 
120 | continue :: CodeGen UserEnv s ()
121 | continue =
122 |     do env <- getEnv
123 |        cont (ctrlDest env)
124 |   where
125 |   cont FallThrough = return ()
126 |   cont (Goto l) = jmp l
127 |   cont (Branch _ _) = error "Branch in continue"
128 |   cont Return = x86_epilog 0
129 | 
130 | 
131 | continueBranch :: Int -> Bool -> CodeGen UserEnv s ()
132 | continueBranch cc isSigned =
133 |     do env <- getEnv
134 |        let Branch c1 c2 = ctrlDest env
135 |        cont cc isSigned c1 c2
136 |   where
137 |   cont cc isSigned (Goto l1) (Goto l2) =
138 |     do x86_branch32 cc 0 isSigned
139 |        emitFixup l1 (-4) Fixup32
140 |        x86_branch32 (negateCC cc) 0 isSigned
141 |        emitFixup l2 (-4) Fixup32
142 |   cont cc isSigned (Goto l1) FallThrough =
143 |     do x86_branch32 cc 0 isSigned
144 |        emitFixup l1 (-4) Fixup32
145 |   cont cc isSigned FallThrough (Goto l2) =
146 |     do x86_branch32 (negateCC cc) 0 isSigned
147 |        emitFixup l2 (-4) Fixup32
148 |   cont cc isSigned (Goto l1) Return =
149 |     do x86_branch32 cc 0 isSigned
150 |        emitFixup l1 (-4) Fixup32
151 |        withCtrlDest Return continue
152 |   cont cc isSigned Return (Goto l2) =
153 |     do x86_branch32 (negateCC cc) 0 isSigned
154 |        emitFixup l2 (-4) Fixup32
155 |        withCtrlDest Return continue
156 |   cont _ _ _ _ = error "unhandled case in continueBranch"
157 | 
158 | reg sreg =
159 |     do env <- getEnv
160 |        reg' sreg (dataDest env)
161 |   where
162 |   reg' sreg (RegDest r) =
163 |     do if sreg /= r
164 |           then mov r sreg
165 |           else return ()
166 |   reg' sreg (StackDest) =
167 |     do push sreg
168 |   reg' sreg (MemBaseDest r offset) =
169 |     do mov (Disp offset, r) sreg
170 |   reg' sreg Ignore = return ()
171 | 
172 | membase reg ofs =
173 |     do env <- getEnv
174 |        membase' reg ofs (dataDest env)
175 |   where
176 |   membase' reg ofs (RegDest r) =
177 |     do mov r (Disp ofs, reg)
178 |   membase' reg ofs (StackDest) =
179 |     do push (Disp ofs, reg)
180 |   membase' reg ofs (MemBaseDest r offset) =
181 |     do push edi
182 |        mov edi (Disp ofs, reg)
183 |        mov (Disp offset, r) edi
184 |        pop edi
185 |   membase' reg ofs Ignore = return ()
186 | 
187 | global ofs =
188 |     do env <- getEnv
189 |        global' ofs (dataDest env)
190 |   where
191 |   global' ofs (RegDest r) =
192 |     do mov r (Addr ofs)
193 |   global' ofs (StackDest) =
194 |     do push (Addr ofs)
195 |   global' ofs (MemBaseDest r offset) =
196 |     do push edi
197 |        mov edi (Addr ofs)
198 |        mov (Disp offset, r) edi
199 |        pop edi
200 |   global' ofs Ignore = return ()
201 | 
202 | immediate value =
203 |     do env <- getEnv
204 |        immediate' value (dataDest env)
205 |   where
206 |   immediate' value (RegDest r) =
207 |     do mov r value
208 |   immediate' value (StackDest) =
209 |     do x86_push_imm value
210 |   immediate' value (MemBaseDest r offset) =
211 |     do push edi
212 |        mov edi value
213 |        mov (Disp offset, r) edi
214 |        pop edi
215 |   immediate' ofs Ignore = return ()
216 | 
217 | -- | Save a number of registers on the stack, perform the given code
218 | -- generation, and restore the registers.
219 | saveRegs :: [Reg32] -> CodeGen UserEnv s r -> CodeGen UserEnv s ()
220 | saveRegs [] cg = cg >> return ()
221 | saveRegs regs cg =
222 |     do gen_push regs
223 |        withCtrlDest FallThrough cg
224 |        gen_pop regs
225 |        continue
226 |   where
227 |   gen_push [] = return ()
228 |   gen_push (r:regs) = push r >> gen_push regs
229 |   gen_pop [] = return ()
230 |   gen_pop (r:regs) = gen_pop regs >> pop r
231 | 
232 | -- | Perform the code generation associated with the variable given.
233 | loadVar :: String -> CodeGen UserEnv UserState ()
234 | loadVar name =
235 |     do UserEnv{bindings = assoc} <- getEnv
236 |        case lookup name assoc of
237 |          Just cg -> cg
238 |          Nothing -> failCodeGen (text ("undefined variable: " ++ name))
239 | 
240 | -- | Set the data destinations to the given values while
241 | -- running the code generator.
242 | withDataDest :: DataDest -> CodeGen UserEnv s r -> CodeGen UserEnv s r
243 | withDataDest ddest cg =
244 |     do env <- getEnv
245 |        withEnv (env{dataDest = ddest}) cg
246 | 
247 | -- | Set the control destinations to the given values while
248 | -- running the code generator.
249 | withCtrlDest :: CtrlDest -> CodeGen UserEnv s r -> CodeGen UserEnv s r
250 | withCtrlDest cdest cg =
251 |     do env <- getEnv
252 |        withEnv (env{ctrlDest = cdest}) cg
253 | 
254 | -- | Set the data and control destinations to the given values while
255 | -- running the code generator.
256 | withDest :: DataDest -> CtrlDest -> CodeGen UserEnv s r -> CodeGen UserEnv s r
257 | withDest ddest cdest cg =
258 |     do env <- getEnv
259 |        withEnv (env{dataDest = ddest, ctrlDest = cdest}) cg
260 | 
261 | -- | Emit the necessary function prolog and epilog code and invoke the
262 | -- given code generator for the code inbetween.
263 | function :: CodeGen UserEnv s r -> CodeGen UserEnv s r
264 | function cg =
265 |     do x86_prolog 0 0
266 |        withDest (RegDest eax) Return $ cg
267 | 


--------------------------------------------------------------------------------
/Harpy/X86CodeGen.hs:
--------------------------------------------------------------------------------
   1 | --------------------------------------------------------------------------
   2 | -- |
   3 | -- Module      :  X86CodeGen
   4 | -- Copyright   :  (c) 2006-2015 Martin Grabmueller and Dirk Kleeblatt
   5 | -- License     :  BSD3
   6 | --
   7 | -- Maintainer  :  martin@grabmueller.de
   8 | -- Stability   :  provisional
   9 | -- Portability :  portable (but generated code non-portable)
  10 | --
  11 | -- Functions for generating x86 machine code instructions.  The
  12 | -- functions make use of the code generation monad in module
  13 | -- "Harpy.CodeGenMonad" for emitting binary code into a code buffer.
  14 | --
  15 | -- This module is very low-level, since there are different
  16 | -- functions for different addressing modes.  A more convenient
  17 | -- interface is provided in module "Harpy.X86Assembler", which uses
  18 | -- the operand types to determine the correct addressing modes for
  19 | -- all supported instructions.
  20 | --
  21 | -- Note: this file does not (yet) provide the complete x86
  22 | -- instruction set, not even all user-mode instructions.  For some
  23 | -- operations, some addressing modes are missing as well.
  24 | --
  25 | -- Copyright notice:
  26 | --
  27 | -- The information in this file is based on the header file
  28 | -- x86-codegen.h from the mono distribution, which has the following
  29 | -- copyright information:
  30 | --
  31 | -- @
  32 | --  * x86-codegen.h: Macros for generating x86 code
  33 | --  *
  34 | --  * Authors:
  35 | --  *   Paolo Molaro (lupus\@ximian.com)
  36 | --  *   Intel Corporation (ORP Project)
  37 | --  *   Sergey Chaban (serge\@wildwestsoftware.com)
  38 | --  *   Dietmar Maurer (dietmar\@ximian.com)
  39 | --  *   Patrik Torstensson
  40 | --  *
  41 | --  * Copyright (C)  2000 Intel Corporation.  All rights reserved.
  42 | --  * Copyright (C)  2001, 2002 Ximian, Inc.
  43 | --  *
  44 | -- @
  45 | --------------------------------------------------------------------------
  46 | 
  47 | module Harpy.X86CodeGen(
  48 |     -- * Types
  49 |     X86_SSE_PFX,
  50 |     -- * Constants
  51 |     -- ** Machine characteristics
  52 |     -- |  Sizes of various machine data types in bytes.
  53 |     x86_dword_size,
  54 |     x86_qword_size,
  55 |     x86_max_instruction_bytes,
  56 |     -- ** Register numbers
  57 |     -- | x86 general-purpose register numbers
  58 |     x86_eax, x86_ecx, x86_edx, x86_ebx, x86_esp, x86_ebp, x86_esi, x86_edi,
  59 |     x86_nobasereg,
  60 |     -- ** Register masks and predicates
  61 |     -- | Bitvector masks for general-purpose registers
  62 |     x86_eax_mask, x86_ecx_mask, x86_edx_mask, x86_ebx_mask,
  63 |     x86_esi_mask, x86_edi_mask, x86_ebp_mask,
  64 |     x86_callee_regs, x86_caller_regs, x86_byte_regs,
  65 |     -- ** ALU operations
  66 |     -- | Opcodes for ALU instructions
  67 |     x86_add, x86_or, x86_adc, x86_sbb, x86_and, x86_sub, x86_xor, x86_cmp,
  68 |     -- ** Shift operations
  69 |     -- | Opcodes for shift instructions
  70 |     x86_rol, x86_ror, x86_rcl, x86_rcr, x86_shl,
  71 |     x86_shr, x86_sar, x86_shld, x86_shlr,
  72 |     -- ** FP operations
  73 |     -- | Opcodes for floating-point instructions
  74 |     x86_fadd, x86_fmul, x86_fcom, x86_fcomp, x86_fsub, x86_fsubr,
  75 |     x86_fdiv, x86_fdivr,
  76 |     -- ** FP conditions and control codes
  77 |     -- | FP status word codes
  78 |     x86_fp_c0, x86_fp_c1, x86_fp_c2, x86_fp_c3, x86_fp_cc_mask,
  79 |     -- | FP control word codes
  80 |     x86_fpcw_invopex_mask, x86_fpcw_denopex_mask, x86_fpcw_zerodiv_mask,
  81 |     x86_fpcw_ovfex_mask, x86_fpcw_undfex_mask, x86_fpcw_precex_mask,
  82 |     x86_fpcw_precc_mask, x86_fpcw_roundc_mask,
  83 |     x86_fpcw_prec_single, x86_fpcw_prec_double,
  84 |     x86_fpcw_prec_extended,
  85 |     x86_fpcw_round_nearest, x86_fpcw_round_down, x86_fpcw_round_up,
  86 |     x86_fpcw_round_tozero,
  87 |     -- ** Condition codes
  88 |     -- | Integer conditions codes
  89 |     x86_cc_eq, x86_cc_e, x86_cc_z,
  90 |     x86_cc_ne, x86_cc_nz,
  91 |     x86_cc_lt, x86_cc_b, x86_cc_c, x86_cc_nae, x86_cc_le, x86_cc_be,
  92 |     x86_cc_na, x86_cc_gt, x86_cc_a, x86_cc_nbe, x86_cc_ge, x86_cc_ae,
  93 |     x86_cc_nb, x86_cc_nc, x86_cc_lz, x86_cc_s, x86_cc_gez, x86_cc_ns,
  94 |     x86_cc_p, x86_cc_np, x86_cc_pe, x86_cc_po, x86_cc_o, x86_cc_no,
  95 |     -- ** Instruction prefix codes
  96 |     x86_lock_prefix, x86_repnz_prefix, x86_repz_prefix, x86_rep_prefix,
  97 |     x86_cs_prefix, x86_ss_prefix, x86_ds_prefix, x86_es_prefix,
  98 |     x86_fs_prefix, x86_gs_prefix, x86_unlikely_prefix,
  99 |     x86_likely_prefix, x86_operand_prefix, x86_address_prefix,
 100 |     -- * Functions
 101 |     -- ** Utility functions
 102 |     x86_is_scratch, x86_is_callee,
 103 |     -- ** Code emission
 104 |     -- | These functions are used to emit parts of instructions, such
 105 |     -- as constants or operand descriptions.
 106 |     x86_imm_emit16, x86_imm_emit8, x86_imm_emit32,
 107 |     x86_membase_emit, x86_alu_reg_imm,
 108 |     -- ** Call instructions
 109 |     x86_call_hs, x86_call_membase, x86_call_mem, x86_call_reg, x86_call_code,
 110 |     x86_call_imm,
 111 |     -- ** Function prologue and epilogue
 112 |     x86_prolog, x86_epilog, x86_enter, x86_leave,
 113 |     x86_ret, x86_ret_imm,
 114 |     -- ** Jump and branch
 115 |     x86_jecxz, x86_branch, x86_branch_pointer, x86_branch32, x86_branch8,
 116 |     x86_jump_membase, x86_jump_pointer, x86_jump_mem, x86_jump_reg,
 117 |     x86_jump32, x86_jump8,
 118 |     x86_loopne, x86_loope, x86_loop,
 119 |     -- ** Stack operations
 120 |     x86_push_reg, x86_push_regp, x86_push_mem, x86_push_membase,
 121 |     x86_push_imm, x86_push_imm_template, x86_push_memindex,
 122 |     x86_pop_membase, x86_pop_mem, x86_pop_reg,
 123 |     x86_popfd, x86_pushfd, x86_popad, x86_pushad,
 124 |     -- ** Data movement
 125 |     x86_mov_reg_reg, x86_mov_reg_imm, x86_mov_mem_imm, x86_mov_membase_imm,
 126 |     x86_mov_memindex_imm, x86_mov_mem_reg, x86_mov_reg_mem,
 127 |     x86_mov_regp_reg, x86_mov_reg_regp, x86_mov_membase_reg,
 128 |     x86_mov_reg_membase, x86_mov_memindex_reg, x86_mov_reg_memindex,
 129 |     -- ** Arithmetic
 130 |     x86_xadd_reg_reg, x86_xadd_mem_reg, x86_xadd_membase_reg,
 131 |     x86_inc_mem, x86_inc_membase, x86_inc_reg,
 132 |     x86_dec_mem, x86_dec_membase, x86_dec_reg,
 133 |     x86_not_mem, x86_not_membase, x86_not_reg,
 134 |     x86_neg_mem, x86_neg_membase, x86_neg_reg,
 135 |     x86_alu_mem_imm, x86_alu_membase_imm, x86_alu_membase8_imm,
 136 |     x86_alu_mem_reg, x86_alu_membase_reg, x86_alu_reg_reg,
 137 |     x86_alu_reg8_reg8, x86_alu_reg_mem, x86_alu_reg_membase,
 138 |     x86_mul_reg, x86_mul_mem, x86_mul_membase,
 139 |     x86_imul_reg_reg, x86_imul_reg_membase, x86_imul_reg_reg_imm,
 140 |     x86_imul_reg_mem,
 141 |     x86_imul_reg_mem_imm, x86_imul_reg_membase_imm,
 142 |     x86_div_reg, x86_div_mem, x86_div_membase,
 143 |     x86_test_reg_imm, x86_test_mem_imm, x86_test_membase_imm,
 144 |     x86_test_reg_reg, x86_test_mem_reg, x86_test_membase_reg,
 145 |     -- ** Exchange
 146 |     x86_cmpxchg_reg_reg, x86_cmpxchg_mem_reg, x86_cmpxchg_membase_reg,
 147 |     x86_xchg_reg_reg, x86_xchg_mem_reg, x86_xchg_membase_reg,
 148 |     -- ** String operations
 149 |     x86_stosb, x86_stosl, x86_stosd, x86_movsb, x86_movsl, x86_movsd,
 150 |     -- ** Bitwise shift
 151 |     x86_shift_reg_imm, x86_shift_mem_imm, x86_shift_membase_imm,
 152 |     x86_shift_reg, x86_shift_mem, x86_shift_membase,
 153 |     x86_shrd_reg, x86_shrd_reg_imm, x86_shld_reg, x86_shld_reg_imm,
 154 |     -- ** Conditional move
 155 |     x86_cmov_membase, x86_cmov_mem, x86_cmov_reg,
 156 |     -- ** Conditional set
 157 |     x86_set_membase, x86_set_mem, x86_set_reg,
 158 |     -- ** Address calculation
 159 |     x86_lea_mem, x86_lea_membase, x86_lea_memindex,
 160 |     -- ** Conversion
 161 |     x86_cdq,x86_widen_memindex, x86_widen_membase, x86_widen_mem,
 162 |     x86_widen_reg,
 163 |     -- ** Floating point
 164 |     x86_fp_op_mem, x86_fp_op_membase, x86_fp_op, x86_fp_op_reg,
 165 |     x86_fp_int_op_membase, x86_fstp, x86_fcompp, x86_fucompp,
 166 |     x86_fnstsw, x86_fnstcw, x86_fnstcw_membase,
 167 |     x86_fldcw, x86_fldcw_membase, x86_fchs,
 168 |     x86_frem, x86_fxch, x86_fcomi, x86_fcomip, x86_fucomi, x86_fucomip,
 169 |     x86_fld, x86_fld_membase, x86_fld80_mem, x86_fld80_membase,
 170 |     x86_fld_reg, x86_fldz, x86_fld1, x86_fldpi,
 171 |     x86_fst, x86_fst_membase, x86_fst80_mem, x86_fst80_membase,
 172 |     FIntSize(..),
 173 |     x86_fist_pop, x86_fist_pop_membase, x86_fstsw,
 174 |     x86_fist_membase, x86_fild, x86_fild_membase,
 175 |     x86_fsin, x86_fcos, x86_fabs, x86_ftst, x86_fxam, x86_fpatan,
 176 |     x86_fprem, x86_fprem1, x86_frndint, x86_fsqrt, x86_fptan,
 177 |     x86_fincstp, x86_fdecstp,
 178 |     -- ** SSE instructions
 179 |     x86_sse_ps, x86_sse_pd, x86_sse_ss, x86_sse_sd,
 180 |     x86_add_sse_reg_reg, x86_add_sse_reg_mem, x86_add_sse_reg_membase,
 181 |     x86_sub_sse_reg_reg, x86_sub_sse_reg_mem, x86_sub_sse_reg_membase,
 182 |     x86_mul_sse_reg_reg, x86_mul_sse_reg_mem, x86_mul_sse_reg_membase,
 183 |     x86_div_sse_reg_reg, x86_div_sse_reg_mem, x86_div_sse_reg_membase,
 184 |     x86_max_sse_reg_reg, x86_max_sse_reg_mem, x86_max_sse_reg_membase,
 185 |     x86_min_sse_reg_reg, x86_min_sse_reg_mem, x86_min_sse_reg_membase,
 186 |     x86_sqrt_sse_reg_reg, x86_sqrt_sse_reg_mem, x86_sqrt_sse_reg_membase,
 187 |     x86_mov_sse_reg_reg, x86_mov_sse_reg_mem, x86_mov_sse_reg_membase, x86_mov_sse_mem_reg ,x86_mov_sse_membase_reg,
 188 |     x86_ucomisd_reg_reg, x86_ucomisd_reg_mem, x86_ucomisd_reg_membase,
 189 |     x86_ucomiss_reg_reg, x86_ucomiss_reg_mem, x86_ucomiss_reg_membase,
 190 |     x86_comisd_reg_reg, x86_comisd_reg_mem, x86_comisd_reg_membase,
 191 |     x86_comiss_reg_reg, x86_comiss_reg_mem, x86_comiss_reg_membase,
 192 |     XMMReg(XMMReg), Mem(Mem), MemBase(MemBase),
 193 |     XMMLocation(xmm_location_emit),
 194 |     x86_movss_to_reg, x86_movss_from_reg,
 195 |     x86_movsd_to_reg, x86_movsd_from_reg,
 196 |     x86_movlps_to_reg, x86_movlps_from_reg,
 197 |     x86_movlpd_to_reg, x86_movlpd_from_reg,
 198 |     x86_movups_to_reg, x86_movups_from_reg,
 199 |     x86_movupd_to_reg, x86_movupd_from_reg,
 200 |     x86_haddps, x86_haddpd,
 201 |     x86_shufps, x86_shufpd,
 202 |     x86_cvtdq2ps, x86_cvttps2dq,
 203 |     -- ** Prefetch instructions
 204 |     x86_prefetch0_mem, x86_prefetch1_mem, x86_prefetch2_mem, x86_prefetchnta_mem,
 205 |     x86_prefetch0_membase, x86_prefetch1_membase, x86_prefetch2_membase, x86_prefetchnta_membase,
 206 |     x86_prefetch0_regp, x86_prefetch1_regp, x86_prefetch2_regp, x86_prefetchnta_regp,
 207 |     -- ** Miscellaneous
 208 |     x86_sahf, x86_wait, x86_nop, x86_breakpoint, x86_rdtsc, x86_cld,
 209 |     x86_prefix, x86_padding,
 210 |     -- ** Other utilities
 211 |     negateCC
 212 |                        ) where
 213 | 
 214 | import qualified Text.PrettyPrint.HughesPJ as PP
 215 | 
 216 | import Data.Word
 217 | import Data.Bits
 218 | 
 219 | import Foreign.Ptr
 220 | 
 221 | import Harpy.CodeGenMonad
 222 | 
 223 | -- | Maximal length of an x86 instruction in bytes.
 224 | x86_max_instruction_bytes :: Int
 225 | x86_max_instruction_bytes = 16   -- According to Intel manual.
 226 | 
 227 | x86_dword_size, x86_qword_size :: Int
 228 | 
 229 | x86_dword_size = 4                    -- Number of bytes in doubleword
 230 | x86_qword_size = 8                    -- Number of bytes in quadword
 231 | 
 232 | x86_eax, x86_ecx, x86_edx, x86_ebx, x86_esp, x86_ebp, x86_esi,
 233 |   x86_edi :: Word8
 234 | x86_eax = 0
 235 | x86_ecx = 1
 236 | x86_edx = 2
 237 | x86_ebx = 3
 238 | x86_esp = 4
 239 | x86_ebp = 5
 240 | x86_esi = 6
 241 | x86_edi = 7
 242 | 
 243 | x86_cmp, x86_or, x86_adc, x86_sbb, x86_and, x86_sub, x86_xor,
 244 |   x86_add :: Word8
 245 | x86_add = 0
 246 | x86_or  = 1
 247 | x86_adc = 2
 248 | x86_sbb = 3
 249 | x86_and = 4
 250 | x86_sub = 5
 251 | x86_xor = 6
 252 | x86_cmp = 7
 253 | 
 254 | x86_sar, x86_shld, x86_shlr, x86_rol, x86_ror, x86_rcl, x86_rcr,
 255 |   x86_shl, x86_shr :: Word8
 256 | 
 257 | x86_shld = 0
 258 | x86_shlr = 1
 259 | x86_rol  = 0
 260 | x86_ror  = 1
 261 | x86_rcl  = 2
 262 | x86_rcr  = 3
 263 | x86_shl  = 4
 264 | x86_shr  = 5
 265 | x86_sar  = 7
 266 | 
 267 | x86_fadd, x86_fmul, x86_fcom, x86_fcomp, x86_fsub, x86_fsubr :: Word8
 268 | x86_fdiv, x86_fdivr :: Word8
 269 | 
 270 | x86_fadd  = 0
 271 | x86_fmul  = 1
 272 | x86_fcom  = 2
 273 | x86_fcomp = 3
 274 | x86_fsub  = 4
 275 | x86_fsubr = 5
 276 | x86_fdiv  = 6
 277 | x86_fdivr = 7
 278 | 
 279 | x86_cc_no, x86_cc_eq, x86_cc_e, x86_cc_z, x86_cc_ne, x86_cc_nz, x86_cc_lt :: Int
 280 | x86_cc_b, x86_cc_c, x86_cc_nae, x86_cc_le, x86_cc_be, x86_cc_na :: Int
 281 | x86_cc_gt :: Int
 282 | x86_cc_a, x86_cc_nbe, x86_cc_ge, x86_cc_ae, x86_cc_nb, x86_cc_nc :: Int
 283 | x86_cc_lz, x86_cc_s, x86_cc_gez, x86_cc_ns, x86_cc_p, x86_cc_pe :: Int
 284 | x86_cc_np, x86_cc_po, x86_cc_o :: Int
 285 | x86_cc_eq  = 0
 286 | x86_cc_e   = 0
 287 | x86_cc_z   = 0
 288 | x86_cc_ne  = 1
 289 | x86_cc_nz  = 1
 290 | x86_cc_lt  = 2
 291 | x86_cc_b   = 2
 292 | x86_cc_c   = 2
 293 | x86_cc_nae = 2
 294 | x86_cc_le  = 3
 295 | x86_cc_be  = 3
 296 | x86_cc_na  = 3
 297 | x86_cc_gt  = 4
 298 | x86_cc_a   = 4
 299 | x86_cc_nbe = 4
 300 | x86_cc_ge  = 5
 301 | x86_cc_ae  = 5
 302 | x86_cc_nb  = 5
 303 | x86_cc_nc  = 5
 304 | x86_cc_lz  = 6
 305 | x86_cc_s   = 6
 306 | x86_cc_gez = 7
 307 | x86_cc_ns  = 7
 308 | x86_cc_p   = 8
 309 | x86_cc_pe  = 8
 310 | x86_cc_np  = 9
 311 | x86_cc_po  = 9
 312 | x86_cc_o   = 10
 313 | x86_cc_no  = 11
 314 | 
 315 | -- | FP status
 316 | x86_fp_c0, x86_fp_c1, x86_fp_c2, x86_fp_c3, x86_fp_cc_mask :: Word32
 317 | x86_fp_c0 = 0x100
 318 | x86_fp_c1 = 0x200
 319 | x86_fp_c2 = 0x400
 320 | x86_fp_c3 = 0x4000
 321 | x86_fp_cc_mask = 0x4500
 322 | 
 323 | -- | FP control word
 324 | x86_fpcw_invopex_mask, x86_fpcw_denopex_mask, x86_fpcw_zerodiv_mask,
 325 |  x86_fpcw_ovfex_mask, x86_fpcw_undfex_mask, x86_fpcw_precex_mask,
 326 |  x86_fpcw_precc_mask, x86_fpcw_roundc_mask :: Word32
 327 | 
 328 | x86_fpcw_invopex_mask = 0x1
 329 | x86_fpcw_denopex_mask = 0x2
 330 | x86_fpcw_zerodiv_mask = 0x4
 331 | x86_fpcw_ovfex_mask   = 0x8
 332 | x86_fpcw_undfex_mask  = 0x10
 333 | x86_fpcw_precex_mask  = 0x20
 334 | x86_fpcw_precc_mask   = 0x300
 335 | x86_fpcw_roundc_mask  = 0xc00
 336 | 
 337 | -- | Values for precision control
 338 | x86_fpcw_prec_single, x86_fpcw_prec_double,
 339 |  x86_fpcw_prec_extended :: Word32
 340 | x86_fpcw_prec_single    = 0
 341 | x86_fpcw_prec_double    = 0x200
 342 | x86_fpcw_prec_extended  = 0x300
 343 | 
 344 | -- | Values for rounding control
 345 | x86_fpcw_round_nearest, x86_fpcw_round_down, x86_fpcw_round_up,
 346 |  x86_fpcw_round_tozero :: Word32
 347 | x86_fpcw_round_nearest  = 0
 348 | x86_fpcw_round_down     = 0x400
 349 | x86_fpcw_round_up       = 0x800
 350 | x86_fpcw_round_tozero   = 0xc00
 351 | 
 352 | -- | Prefix codes
 353 | x86_lock_prefix, x86_repnz_prefix, x86_repz_prefix, x86_rep_prefix,
 354 |  x86_cs_prefix, x86_ss_prefix, x86_ds_prefix, x86_es_prefix,
 355 |  x86_fs_prefix, x86_gs_prefix, x86_unlikely_prefix,
 356 |  x86_likely_prefix, x86_operand_prefix, x86_address_prefix :: Word8
 357 | x86_lock_prefix = 0xf0
 358 | x86_repnz_prefix = 0xf2
 359 | x86_repz_prefix = 0xf3
 360 | x86_rep_prefix = 0xf3
 361 | x86_cs_prefix = 0x2e
 362 | x86_ss_prefix = 0x36
 363 | x86_ds_prefix = 0x3e
 364 | x86_es_prefix = 0x26
 365 | x86_fs_prefix = 0x64
 366 | x86_gs_prefix = 0x65
 367 | x86_unlikely_prefix = 0x2e
 368 | x86_likely_prefix = 0x3e
 369 | x86_operand_prefix = 0x66
 370 | x86_address_prefix = 0x67
 371 | 
 372 | -- | Mapping from condition code to opcode (unsigned)
 373 | x86_cc_unsigned_map :: [Word8]
 374 | x86_cc_unsigned_map = [
 375 |        0x74, -- eq
 376 |        0x75, -- ne
 377 |        0x72, -- lt
 378 |        0x76, -- le
 379 |        0x77, -- gt
 380 |        0x73, -- ge
 381 |        0x78, -- lz
 382 |        0x79, -- gez
 383 |        0x7a, -- p
 384 |        0x7b, -- np
 385 |        0x70, -- o
 386 |        0x71  -- no
 387 |  ]
 388 | 
 389 | -- | Mapping from condition code to opcode (signed)
 390 | x86_cc_signed_map :: [Word8]
 391 | x86_cc_signed_map = [
 392 |       0x74, -- eq
 393 |       0x75, -- ne
 394 |       0x7c, -- lt
 395 |       0x7e, -- le
 396 |       0x7f, -- gt
 397 |       0x7d, -- ge
 398 |       0x78, -- lz
 399 |       0x79, -- gez
 400 |       0x7a, -- p
 401 |       0x7b, -- np
 402 |       0x70, -- o
 403 |       0x71  -- no
 404 |  ]
 405 | 
 406 | -- | Mapping from condition code to negated condition code.
 407 | x86_cc_negate :: [(Int, Int)]
 408 | x86_cc_negate = [
 409 |        (x86_cc_eq, x86_cc_ne), -- eq
 410 |        (x86_cc_ne, x86_cc_eq), -- ne
 411 |        (x86_cc_lt, x86_cc_ge), -- lt
 412 |        (x86_cc_le, x86_cc_gt), -- le
 413 |        (x86_cc_gt, x86_cc_le), -- gt
 414 |        (x86_cc_ge, x86_cc_lt), -- ge
 415 |        (x86_cc_lz, x86_cc_gez), -- lz
 416 |        (x86_cc_gez, x86_cc_lz), -- gez
 417 |        (x86_cc_p, x86_cc_np), -- p
 418 |        (x86_cc_np, x86_cc_p), -- np
 419 |        (x86_cc_o, x86_cc_no), -- o
 420 |        (x86_cc_no, x86_cc_o)  -- no
 421 |  ]
 422 | 
 423 | -- | Invert a condition code.
 424 | negateCC :: Int -> Int
 425 | negateCC cc =
 426 |     case lookup cc x86_cc_negate of
 427 |       Just cc' -> cc'
 428 |       Nothing -> error ("unhandled case in negateCC" ++ show cc)
 429 | 
 430 | -- | Used to encode the fact that no base register is used in an
 431 | -- instruction.
 432 | x86_nobasereg :: Word8
 433 | x86_nobasereg = (-1)
 434 | 
 435 | x86_edi_mask, x86_esi_mask, x86_ebx_mask, x86_ebp_mask,
 436 |     x86_eax_mask, x86_ecx_mask, x86_edx_mask:: Int
 437 | x86_esi_mask = (1 `shiftL` (fromIntegral x86_esi))
 438 | x86_edi_mask = (1 `shiftL` (fromIntegral x86_edi))
 439 | x86_ebx_mask = (1 `shiftL` (fromIntegral x86_ebx))
 440 | x86_ebp_mask = (1 `shiftL` (fromIntegral x86_ebp))
 441 | x86_eax_mask = (1 `shiftL` (fromIntegral x86_eax))
 442 | x86_ecx_mask = (1 `shiftL` (fromIntegral x86_ecx))
 443 | x86_edx_mask = (1 `shiftL` (fromIntegral x86_edx))
 444 | 
 445 | -- | Bitvector mask for callee-saved registers
 446 | x86_callee_regs :: Int
 447 | x86_callee_regs = ((1 `shiftL` (fromIntegral x86_eax)) .|.
 448 |            (1 `shiftL` (fromIntegral x86_ecx)) .|.
 449 |            (1 `shiftL` (fromIntegral x86_edx)))
 450 | 
 451 | -- | Bitvector mask for caller-saved registers
 452 | x86_caller_regs :: Int
 453 | x86_caller_regs = ((1 `shiftL` (fromIntegral x86_ebx)) .|.
 454 |            (1 `shiftL` (fromIntegral x86_ebp)) .|.
 455 |            (1 `shiftL` (fromIntegral x86_esi)) .|.
 456 |            (1 `shiftL` (fromIntegral x86_edi)))
 457 | 
 458 | -- | Bitvector mask for byte-adressable registers
 459 | x86_byte_regs :: Int
 460 | x86_byte_regs =  ((1 `shiftL` (fromIntegral x86_eax)) .|.
 461 |           (1 `shiftL` (fromIntegral x86_ecx)) .|.
 462 |           (1 `shiftL` (fromIntegral x86_edx)) .|.
 463 |           (1 `shiftL` (fromIntegral x86_ebx)))
 464 | 
 465 | -- | Returns true when the given register is caller-saved.
 466 | x86_is_scratch :: Int -> Bool
 467 | x86_is_scratch reg = (x86_caller_regs .&. (1 `shiftL` (reg))) /= 0
 468 | 
 469 | -- | Returns true when the given register is caller-saved.
 470 | x86_is_callee :: Int -> Bool
 471 | 
 472 | x86_is_callee reg =  (x86_callee_regs .&. (1 `shiftL` (reg))) /= 0
 473 | 
 474 | -- | Returns true when the given register is byte-addressable.
 475 | x86_is_byte_reg :: (Num a, Ord a) => a -> Bool
 476 | x86_is_byte_reg reg = ((reg) < 4)
 477 | 
 478 | 
 479 | 
 480 | -- useful building blocks
 481 | 
 482 | 
 483 | --x86_modrm_mod modrm = ((modrm) `shiftR` 6)
 484 | --x86_modrm_reg :: Bits a => a -> a
 485 | --x86_modrm_reg modrm = (((modrm) `shiftR` 3) .&. 0x7)
 486 | --x86_modrm_rm modrm = ((modrm) .&. 0x7)
 487 | 
 488 | x86_address_byte :: Word8 -> Word8 -> Word8 -> CodeGen e s ()
 489 | x86_address_byte m o r = emit8 ((((m) .&. 0x03) `shiftL` 6) .|.
 490 |                                (((o) .&. 0x07) `shiftL` 3) .|.
 491 |                                 (((r) .&. 0x07)))
 492 | 
 493 | -- | Emit a 32-bit constant to the instruction stream.
 494 | x86_imm_emit32 :: Word32 -> CodeGen e s ()
 495 | x86_imm_emit32 imm = emit32 imm
 496 | 
 497 | -- -- | Emit a 32-bit constant to the instruction stream at the given offset.
 498 | -- x86_imm_emit32_at :: Int -> Word32 -> CodeGen e s ()
 499 | -- x86_imm_emit32_at pos imm = emit32At pos imm
 500 | 
 501 | -- | Emit a 16-bit constant to the instruction stream.
 502 | x86_imm_emit16 :: Word16 -> CodeGen e s ()
 503 | x86_imm_emit16 imm =
 504 |     let b0 = (imm .&. 0xff)
 505 |         b1 = ((imm `shiftR` 8) .&. 0xff)
 506 |     in do emit8 (fromIntegral b0)
 507 |           emit8 (fromIntegral b1)
 508 | 
 509 | -- | Emit a 8-bit constant to the instruction stream.
 510 | x86_imm_emit8 :: Word8 -> CodeGen e s ()
 511 | x86_imm_emit8 imm =
 512 |   emit8 (imm .&. 0xff)
 513 | 
 514 | -- -- | Emit a 8-bit constant to the instruction stream at the given offset.
 515 | -- x86_imm_emit8_at :: Int -> Word8 -> CodeGen e s ()
 516 | -- x86_imm_emit8_at pos imm = emit8At pos (imm .&. 0xff)
 517 | 
 518 | -- | Return true if the given value is a signed 8-bit constant.
 519 | x86_is_imm8 :: Integral a => a -> Bool
 520 | x86_is_imm8 imm =  (((fromIntegral imm :: Integer) >= -128) && ((fromIntegral imm :: Integer) <= 127))
 521 | -- x86_is_imm16 :: Integral a => a -> Bool
 522 | -- x86_is_imm16 imm = (((fromIntegral imm :: Integer) >= -(1 `shiftL` 16)) &&
 523 | --                               ((fromIntegral imm :: Integer) <= ((1 `shiftL` 16)-1)))
 524 | 
 525 | x86_reg_emit :: Word8 -> Word8 -> CodeGen e s ()
 526 | x86_reg_emit r regno = x86_address_byte 3 r regno
 527 | 
 528 | x86_reg8_emit :: Word8 -> Word8 -> Bool -> Bool -> CodeGen e s ()
 529 | x86_reg8_emit r regno is_rh is_rnoh =
 530 |   x86_address_byte 3 (if is_rh then (r .|. 4) else r)
 531 |                      (if is_rnoh then regno .|. 4 else regno)
 532 | 
 533 | -- | Emit a register-indirect address encoding.
 534 | x86_regp_emit :: Word8 -> Word8 -> CodeGen e s ()
 535 | x86_regp_emit r regno = x86_address_byte 0 r regno
 536 | 
 537 | -- | Emit a memory+displacement address encoding.
 538 | x86_mem_emit :: Word8 -> Word32 -> CodeGen e s ()
 539 | x86_mem_emit r disp = do x86_address_byte 0 r 5
 540 |                          x86_imm_emit32 disp
 541 | 
 542 | -- | Emit a mem+base address encoding
 543 | x86_membase_emit :: Word8 -> Word8 -> Word32 -> CodeGen e s ()
 544 | x86_membase_emit r basereg disp =
 545 |     if basereg == x86_esp
 546 |        then if disp == 0
 547 |                then do x86_address_byte 0 r x86_esp
 548 |                        x86_address_byte 0 x86_esp x86_esp
 549 |                else if x86_is_imm8 disp
 550 |                        then do x86_address_byte 1 r x86_esp
 551 |                                x86_address_byte 0 x86_esp x86_esp
 552 |                                x86_imm_emit8 (fromIntegral disp)
 553 |                        else do x86_address_byte 2 r x86_esp
 554 |                                x86_address_byte 0 x86_esp x86_esp
 555 |                                x86_imm_emit32 (fromIntegral disp)
 556 |        else do if (disp == 0 && (toInteger basereg) /= (toInteger x86_ebp))
 557 |                   then x86_address_byte 0 r basereg
 558 |                   else if x86_is_imm8 (fromIntegral disp :: Word32)
 559 |                           then do x86_address_byte 1 r basereg
 560 |                                   x86_imm_emit8 (fromIntegral disp)
 561 |                           else do x86_address_byte 2 r basereg
 562 |                                   x86_imm_emit32 (fromIntegral disp)
 563 | 
 564 | x86_memindex_emit :: Word8 -> Word8 -> Word32 -> Word8 -> Word8 -> CodeGen e s ()
 565 | x86_memindex_emit r basereg disp indexreg shft =
 566 |     if (basereg == x86_nobasereg)
 567 |        then do x86_address_byte 0 r 4
 568 |                x86_address_byte shft indexreg 5
 569 |                x86_imm_emit32 disp
 570 |        else if ((disp) == 0 && (basereg) /= x86_ebp)
 571 |                then do x86_address_byte 0 r 4
 572 |                        x86_address_byte shft indexreg (fromIntegral basereg)
 573 |                 else if x86_is_imm8 disp
 574 |                         then do x86_address_byte 1 r 4
 575 |                                 x86_address_byte shft indexreg
 576 |                                              (fromIntegral basereg)
 577 |                                 x86_imm_emit8 (fromIntegral disp)
 578 |                         else do x86_address_byte 2 r 4
 579 |                                 x86_address_byte shft indexreg 5
 580 |                                 x86_imm_emit32 disp
 581 | 
 582 | {-
 583 | x86_jmp_ofs_size ins =
 584 |   do instr <- peek8At ins
 585 |      case instr of
 586 |        0xe8 -> return 1
 587 |        0xe9 -> return 1
 588 |        0x0f ->
 589 |          do atPos <- peek8At (ins + 1)
 590 |             if (atPos < 0x70 || atPos > 0x8f)
 591 |                then failCodeGen (PP.text "Wrong Opcode")
 592 |                else return 1
 593 |        _ -> return 0
 594 | -}
 595 | 
 596 | -- target is the position in the code where to jump to:
 597 | 
 598 | -- target = code;
 599 | -- .. output loop code...
 600 | -- x86_mov_reg_imm (code, X86_EAX, 0);
 601 | -- loop = code;
 602 | -- x86_loop (code, -1);
 603 | -- ... finish method
 604 | 
 605 | -- patch displacement
 606 | 
 607 | -- x86_patch (loop, target);
 608 | 
 609 | -- ins should point at the start of the instruction that encodes a target.
 610 | -- the instruction is inspected for validity and the correct displacement
 611 | -- is inserted.
 612 | 
 613 | {-
 614 | x86_patch ins target =
 615 |     let pos = ins + 1
 616 |     in do size <- x86_jmp_ofs_size ins
 617 |           instr <- peek8At ins
 618 |           let disp = target - (if instr == 0x0f then pos + 1 else pos)
 619 |           if size == 1
 620 |              then x86_imm_emit32_at pos (fromIntegral (disp - 4))
 621 |              else if (x86_is_imm8 (disp - 1))
 622 |                      then x86_imm_emit8_at pos (fromIntegral (disp - 1))
 623 |                      else failCodeGen (PP.text "Wrong offset")
 624 | -}
 625 | 
 626 | x86_breakpoint, x86_cld, x86_stosb, x86_stosl, x86_stosd, x86_movsb,
 627 |  x86_movsl, x86_movsd :: CodeGen s e ()
 628 | x86_breakpoint = emit8 0xcc
 629 | x86_cld = emit8 0xfc
 630 | x86_stosb = emit8 0xaa
 631 | x86_stosl = emit8 0xab
 632 | x86_stosd = x86_stosl
 633 | x86_movsb = emit8 0xa4
 634 | x86_movsl = emit8 0xa5
 635 | x86_movsd = x86_movsl
 636 | 
 637 | x86_prefix :: Word8 -> CodeGen s e ()
 638 | x86_prefix p = emit8 p
 639 | 
 640 | x86_rdtsc :: CodeGen s e ()
 641 | x86_rdtsc = emit8 0x0f >> emit8 0x31
 642 | 
 643 | x86_cmpxchg_reg_reg :: Word8 -> Word8 -> CodeGen e s ()
 644 | x86_cmpxchg_reg_reg dreg reg =
 645 |     emit8 0x0f >> emit8 0xb1 >> x86_reg_emit reg dreg
 646 | 
 647 | x86_cmpxchg_mem_reg :: Word32 -> Word8 -> CodeGen e s ()
 648 | x86_cmpxchg_mem_reg mem reg = emit8 0x0f >> emit8 0xb1 >> x86_mem_emit reg mem
 649 | 
 650 | x86_cmpxchg_membase_reg :: Word8 -> Word32 -> Word8 -> CodeGen e s ()
 651 | x86_cmpxchg_membase_reg basereg disp reg =
 652 |     emit8 0x0f >> emit8 0xb1 >> x86_membase_emit reg basereg disp
 653 | 
 654 | x86_xchg :: (Eq a, Num a) => a -> CodeGen e s ()
 655 | x86_xchg size = if size == 1 then emit8 0x86 else emit8 0x87
 656 | 
 657 | x86_xchg_reg_reg dreg reg size =
 658 |     do x86_xchg size ; x86_reg_emit reg dreg
 659 | x86_xchg_mem_reg mem reg size =
 660 |     do x86_xchg size ; x86_mem_emit reg mem
 661 | x86_xchg_membase_reg basereg disp reg size =
 662 |     do x86_xchg size ; x86_membase_emit reg basereg disp
 663 | 
 664 | x86_xadd :: (Eq a, Num a) => a -> CodeGen e s ()
 665 | x86_xadd size = do emit8 0x0f ; if size == 1 then emit8 0xc0 else emit8 0xc1
 666 | x86_xadd_reg_reg dreg reg size = x86_xadd size >> x86_reg_emit reg dreg
 667 | x86_xadd_mem_reg mem reg size = x86_xadd size >> x86_mem_emit reg mem
 668 | x86_xadd_membase_reg basereg disp reg size =
 669 |     x86_xadd size >> x86_membase_emit reg basereg disp
 670 | 
 671 | x86_inc_mem mem = emit8 0xff >> x86_mem_emit 0 mem
 672 | x86_inc_membase basereg disp = emit8 0xff >> x86_membase_emit 0 basereg disp
 673 | x86_inc_reg reg = emit8 (0x40 + reg)
 674 | 
 675 | x86_dec_mem mem = emit8 0xff >> x86_mem_emit 1 mem
 676 | x86_dec_membase basereg disp = emit8 0xff >> x86_membase_emit 1 basereg disp
 677 | x86_dec_reg reg = emit8 (0x48 + reg)
 678 | 
 679 | x86_not_mem mem = emit8 0xf7 >> x86_mem_emit 2 mem
 680 | x86_not_membase basereg disp = emit8 0xf7 >> x86_membase_emit 2 basereg disp
 681 | x86_not_reg reg = emit8 0xf7 >> x86_reg_emit 2 reg
 682 | 
 683 | x86_neg_mem mem = emit8 0xf7 >> x86_mem_emit 3 mem
 684 | x86_neg_membase basereg disp = emit8 0xf7 >> x86_membase_emit 3 basereg disp
 685 | x86_neg_reg reg = emit8 0xf7 >> x86_reg_emit 3 reg
 686 | 
 687 | x86_nop :: CodeGen s e ()
 688 | x86_nop = emit8 0x90
 689 | 
 690 | x86_alu_reg_imm :: Word8 -> Word8 -> Int -> CodeGen e s ()
 691 | x86_alu_reg_imm opc reg imm =
 692 |     do if reg == x86_eax
 693 |           then emit8 (fromIntegral (((opc) `shiftL` 3) + 5)) >> x86_imm_emit32 (fromIntegral imm)
 694 |           else if x86_is_imm8 imm
 695 |                   then do emit8 0x83
 696 |                           x86_reg_emit (fromIntegral opc) (fromIntegral reg)
 697 |                           x86_imm_emit8 (fromIntegral imm)
 698 |                   else do emit8 0x81
 699 |                           x86_reg_emit (fromIntegral opc) (fromIntegral reg)
 700 |                           x86_imm_emit32 (fromIntegral imm)
 701 | 
 702 | 
 703 | x86_alu_mem_imm opc mem imm =
 704 |     if x86_is_imm8 imm
 705 |        then do emit8 0x83
 706 |                x86_mem_emit opc mem
 707 |                x86_imm_emit8 (fromIntegral imm)
 708 |        else do emit8 0x81
 709 |                x86_mem_emit opc mem
 710 |                x86_imm_emit32 imm
 711 | 
 712 | 
 713 | x86_alu_membase_imm opc basereg disp imm =
 714 |     if x86_is_imm8 imm
 715 |        then do emit8 0x83
 716 |                x86_membase_emit opc basereg disp
 717 |                x86_imm_emit8 (fromIntegral imm)
 718 |        else do emit8 0x81
 719 |                x86_membase_emit opc basereg disp
 720 |                x86_imm_emit32 imm
 721 | x86_alu_membase8_imm opc basereg disp imm =
 722 |     do emit8 0x80
 723 |        x86_membase_emit opc basereg disp
 724 |        x86_imm_emit8 imm
 725 | x86_alu_mem_reg opc mem reg =
 726 |         do emit8 ((opc `shiftL` 3) + 1)
 727 |            x86_mem_emit reg mem
 728 | x86_alu_membase_reg opc basereg disp reg =
 729 |     do emit8 ((opc `shiftL` 3) + 1)
 730 |        x86_membase_emit reg basereg disp
 731 | x86_alu_reg_reg opc dreg reg =
 732 |     do emit8 ((opc `shiftL` 3) + 3)
 733 |        x86_reg_emit dreg reg
 734 | 
 735 | -- @x86_alu_reg8_reg8:
 736 | -- Supports ALU operations between two 8-bit registers.
 737 | -- dreg := dreg opc reg
 738 | -- X86_Reg_No enum is used to specify the registers.
 739 | -- Additionally is_*_h flags are used to specify what part
 740 | -- of a given 32-bit register is used - high (TRUE) or low (FALSE).
 741 | -- For example: dreg = X86_EAX, is_dreg_h = TRUE -> use AH
 742 | 
 743 | x86_alu_reg8_reg8 opc dreg reg is_dreg_h is_reg_h =
 744 |     do emit8 ((opc `shiftL` 3) + 2)
 745 |        x86_reg8_emit dreg reg is_dreg_h is_reg_h
 746 | x86_alu_reg_mem opc reg mem =
 747 |     do emit8 ((opc `shiftL` 3) + 3)
 748 |        x86_mem_emit reg mem
 749 | x86_alu_reg_membase opc reg basereg disp =
 750 |     do emit8 ((opc `shiftL` 3) + 3)
 751 |        x86_membase_emit reg basereg disp
 752 | 
 753 | x86_test_reg_imm reg imm =
 754 |     do if reg == x86_eax
 755 |           then emit8 0xa9
 756 |           else do emit8 0xf7 ; x86_reg_emit 0 (fromIntegral reg)
 757 |        x86_imm_emit32 imm
 758 | x86_test_mem_imm mem imm =
 759 |     do emit8 0xf7 ; x86_mem_emit 0 mem ; x86_imm_emit32 imm
 760 | x86_test_membase_imm basereg disp imm =
 761 |     do emit8 0xf7 ; x86_membase_emit 0 basereg disp ; x86_imm_emit32 imm
 762 | x86_test_reg_reg dreg reg = do emit8 0x85 ; x86_reg_emit reg dreg
 763 | x86_test_mem_reg mem reg =
 764 |     do emit8 0x85 ; x86_mem_emit reg mem
 765 | x86_test_membase_reg basereg disp reg =
 766 |     do emit8 0x85 ; x86_membase_emit reg basereg disp
 767 | 
 768 | x86_shift_reg_imm opc reg imm =
 769 |     if imm == 1
 770 |        then do emit8 0xd1 ; x86_reg_emit opc reg
 771 |        else do emit8 0xc1
 772 |                x86_reg_emit opc reg
 773 |                x86_imm_emit8 imm
 774 | x86_shift_mem_imm opc mem imm =
 775 |     if imm == 1
 776 |        then do emit8 0xd1 ; x86_mem_emit opc mem
 777 |        else do emit8 0xc1
 778 |                x86_mem_emit opc mem
 779 |                x86_imm_emit8 imm
 780 | x86_shift_membase_imm opc basereg disp imm =
 781 |     if imm == 1
 782 |        then do emit8 0xd1 ; x86_membase_emit opc basereg disp
 783 |        else do emit8 0xc1
 784 |                x86_membase_emit opc basereg disp
 785 |                x86_imm_emit8 imm
 786 | x86_shift_reg opc reg =
 787 |     emit8 0xd3 >> x86_reg_emit opc reg
 788 | x86_shift_mem opc mem =
 789 |     emit8 0xd3 >> x86_mem_emit opc mem
 790 | x86_shift_membase opc basereg disp =
 791 |     emit8 0xd3 >> x86_membase_emit opc basereg disp
 792 | 
 793 | -- Multi op shift missing.
 794 | 
 795 | x86_shrd_reg dreg reg =
 796 |     emit8 0x0f >> emit8 0xad >> x86_reg_emit reg dreg
 797 | x86_shrd_reg_imm dreg reg shamt =
 798 |     emit8 0x0f >> emit8 0xac >> x86_reg_emit reg dreg >> x86_imm_emit8 shamt
 799 | x86_shld_reg dreg reg =
 800 |     emit8 0x0f >> emit8 0xa5 >> x86_reg_emit reg dreg
 801 | x86_shld_reg_imm dreg reg shamt =
 802 |     emit8 0x0f >> emit8 0xa4 >> x86_reg_emit reg dreg >>x86_imm_emit8 shamt
 803 | 
 804 | -- EDX:EAX = EAX * rm
 805 | 
 806 | x86_mul_reg :: Word8 -> Bool -> CodeGen e s ()
 807 | x86_mul_reg reg is_signed =
 808 |     emit8 0xf7 >> x86_reg_emit (4 + (if is_signed then 1 else 0)) reg
 809 | 
 810 | x86_mul_mem :: Word32 -> Bool -> CodeGen e s ()
 811 | x86_mul_mem mem is_signed =
 812 |     emit8 0xf7 >> x86_mem_emit (4 + (if is_signed then 1 else 0)) mem
 813 | 
 814 | x86_mul_membase :: Word8 -> Word32 -> Bool -> CodeGen e s ()
 815 | x86_mul_membase basereg disp is_signed =
 816 |     do emit8 0xf7
 817 |        x86_membase_emit (4 + (if is_signed then 1 else 0)) basereg disp
 818 | 
 819 | -- r *= rm
 820 | 
 821 | x86_imul_reg_reg :: Word8 -> Word8 -> CodeGen e s ()
 822 | x86_imul_reg_reg dreg reg =
 823 |     emit8 0x0f >> emit8 0xaf >> x86_reg_emit dreg reg
 824 | 
 825 | x86_imul_reg_mem :: Word8 -> Word32 -> CodeGen e s ()
 826 | x86_imul_reg_mem reg mem =
 827 |     emit8 0x0f >> emit8 0xaf >> x86_mem_emit reg mem
 828 | 
 829 | x86_imul_reg_membase :: Word8 -> Word8 -> Word32 -> CodeGen e s ()
 830 | x86_imul_reg_membase reg basereg disp =
 831 |     emit8 0x0f >> emit8 0xaf >> x86_membase_emit reg basereg disp
 832 | 
 833 | -- dreg = rm * imm
 834 | 
 835 | x86_imul_reg_reg_imm :: Word8 -> Word8 -> Word32 -> CodeGen e s ()
 836 | x86_imul_reg_reg_imm dreg reg imm =
 837 |     if x86_is_imm8 imm
 838 |        then emit8 0x6b >> x86_reg_emit dreg reg >>
 839 |               x86_imm_emit8 (fromIntegral imm)
 840 |        else emit8 0x69 >> x86_reg_emit dreg reg >> x86_imm_emit32 imm
 841 | 
 842 | x86_imul_reg_mem_imm :: Word8 -> Word32 -> Word32 -> CodeGen e s ()
 843 | x86_imul_reg_mem_imm reg mem imm =
 844 |     if x86_is_imm8 imm
 845 |        then emit8 0x6b >> x86_mem_emit reg mem >>
 846 |               x86_imm_emit8 (fromIntegral imm)
 847 |        else emit8 0x69 >> x86_reg_emit reg (fromIntegral mem) >>
 848 |               x86_imm_emit32 imm
 849 | 
 850 | x86_imul_reg_membase_imm :: Word8 -> Word8 -> Word32 -> Word32 -> CodeGen e s ()
 851 | x86_imul_reg_membase_imm reg basereg disp imm =
 852 |     if x86_is_imm8 imm
 853 |        then emit8 0x6b >> x86_membase_emit reg basereg disp >>
 854 |             x86_imm_emit8 (fromIntegral imm)
 855 |        else do emit8 0x69
 856 |                x86_membase_emit reg basereg disp
 857 |                x86_imm_emit32 imm
 858 | 
 859 | -- divide EDX:EAX by rm;
 860 | -- eax = quotient, edx = remainder
 861 | 
 862 | x86_div_reg :: Word8 -> Bool -> CodeGen e s ()
 863 | x86_div_reg reg is_signed =
 864 |     emit8 0xf7 >> x86_reg_emit (6 + (if is_signed then 1 else 0)) reg
 865 | x86_div_mem :: Word32 -> Bool -> CodeGen e s ()
 866 | x86_div_mem mem is_signed =
 867 |     emit8 0xf7 >> x86_mem_emit (6 + (if is_signed then 1 else 0)) mem
 868 | 
 869 | x86_div_membase :: Word8 -> Word32 -> Bool -> CodeGen e s ()
 870 | x86_div_membase basereg disp is_signed =
 871 |     do emit8 0xf7
 872 |        x86_membase_emit (6 + (if is_signed then 1 else 0)) basereg disp
 873 | 
 874 | x86_mov1 :: (Eq t, Num t) => t -> CodeGen e s ()
 875 | x86_mov1 size =
 876 |     case size of
 877 |          1 -> emit8 0x88
 878 |          2 -> emit8 0x66 >> emit8 0x89
 879 |          4 -> emit8 0x89
 880 |          _ -> failCodeGen (PP.text "invalid operand size")
 881 | 
 882 | x86_mov2 :: (Eq t, Num t) => t -> CodeGen e s ()
 883 | x86_mov2 size =
 884 |     case size of
 885 |          1 -> emit8 0x8a
 886 |          2 -> emit8 0x66 >> emit8 0x8b
 887 |          4 -> emit8 0x8b
 888 |          _ -> failCodeGen (PP.text "invalid operand size")
 889 | 
 890 | x86_mov_mem_reg :: (Eq t, Num t) => Word32 -> Word8 -> t -> CodeGen e s ()
 891 | x86_mov_mem_reg mem reg size =
 892 |     do x86_mov1 size ; x86_mem_emit reg mem
 893 | 
 894 | x86_mov_regp_reg :: (Eq t, Num t) => Word8 -> Word8 -> t -> CodeGen e s ()
 895 | x86_mov_regp_reg regp reg size =
 896 |     do x86_mov1 size ; x86_regp_emit reg regp
 897 | 
 898 | x86_mov_reg_regp :: (Eq t, Num t) => Word8 -> Word8 -> t -> CodeGen e s ()
 899 | x86_mov_reg_regp reg regp size =
 900 |     do x86_mov2 size ; x86_regp_emit reg regp
 901 | 
 902 | x86_mov_membase_reg :: (Eq t, Num t) => Word8 -> Word32 -> Word8 -> t -> CodeGen e s ()
 903 | x86_mov_membase_reg basereg disp reg size =
 904 |     do x86_mov1 size ; x86_membase_emit reg basereg disp
 905 | 
 906 | x86_mov_memindex_reg :: (Eq t, Num t) => Word8 -> Word32 -> Word8 -> Word8 -> Word8 -> t -> CodeGen e s ()
 907 | x86_mov_memindex_reg basereg disp indexreg shft reg size =
 908 |     do x86_mov1 size ; x86_memindex_emit reg basereg disp indexreg shft
 909 | 
 910 | x86_mov_reg_reg :: (Eq t, Num t) => Word8 -> Word8 -> t -> CodeGen e s ()
 911 | x86_mov_reg_reg dreg reg size =
 912 |     do x86_mov2 size
 913 |        x86_reg_emit dreg reg
 914 | 
 915 | x86_mov_reg_mem :: (Eq t, Num t) => Word8 -> Word32 -> t -> CodeGen e s ()
 916 | x86_mov_reg_mem reg mem size =
 917 |     do x86_mov2 size
 918 |        x86_mem_emit reg mem
 919 | 
 920 | x86_mov_reg_membase :: (Eq t, Num t) => Word8 -> Word8 -> Word32 -> t -> CodeGen e s ()
 921 | x86_mov_reg_membase reg basereg disp size =
 922 |     do x86_mov2 size
 923 |        x86_membase_emit reg basereg disp
 924 | 
 925 | x86_mov_reg_memindex :: (Eq t, Num t) => Word8 -> Word8 -> Word32 -> Word8 -> Word8 -> t -> CodeGen e s ()
 926 | x86_mov_reg_memindex _ _ _ 4 _ _ =
 927 |     failCodeGen $ PP.text "x86_mov_reg_memindex: cannot use (E)SP as index register"
 928 | x86_mov_reg_memindex reg basereg disp indexreg shft size =
 929 |     do x86_mov2 size
 930 |        x86_memindex_emit reg basereg disp indexreg  shft
 931 | 
 932 | x86_mov_reg_imm :: Word8 -> Word32 -> CodeGen e s ()
 933 | x86_mov_reg_imm reg imm =
 934 |     emit8 (0xb8 + reg) >> x86_imm_emit32 imm
 935 | 
 936 | x86_mov_mem_imm :: (Eq a, Num a) => Word32 -> Word32 -> a -> CodeGen e s ()
 937 | x86_mov_mem_imm mem imm size =
 938 |     if size == 1
 939 |        then do emit8 0xc6;
 940 |                x86_mem_emit 0 mem
 941 |                x86_imm_emit8 (fromIntegral imm)
 942 |        else if size == 2
 943 |                then do emit8 0x66
 944 |                        emit8 0xc7
 945 |                        x86_mem_emit 0 mem
 946 |                        x86_imm_emit16 (fromIntegral imm)
 947 |                else do emit8 0xc7
 948 |                        x86_mem_emit 0 mem
 949 |                        x86_imm_emit32 imm
 950 | 
 951 | x86_mov_membase_imm :: (Eq a, Num a) => Word8 -> Word32 -> Word32 -> a -> CodeGen e s ()
 952 | x86_mov_membase_imm basereg disp imm size =
 953 |     if size == 1
 954 |        then do emit8 0xc6
 955 |                x86_membase_emit 0 basereg disp
 956 |                x86_imm_emit8 (fromIntegral imm)
 957 |        else if size == 2
 958 |                then do emit8 0x66
 959 |                        emit8 0xc7
 960 |                        x86_membase_emit 0 basereg disp
 961 |                        x86_imm_emit16 (fromIntegral imm)
 962 |                else do emit8 0xc7
 963 |                        x86_membase_emit 0 basereg disp
 964 |                        x86_imm_emit32 imm
 965 | 
 966 | x86_mov_memindex_imm :: (Eq a, Num a) => Word8 -> Word32 -> Word8 -> Word8 -> Word32 -> a -> CodeGen e s ()
 967 | x86_mov_memindex_imm basereg disp indexreg shft imm size =
 968 |     if size == 1
 969 |     then do emit8 0xc6
 970 |             x86_memindex_emit 0 basereg disp indexreg  shft
 971 |             x86_imm_emit8 (fromIntegral imm)
 972 |     else if size == 2
 973 |          then do emit8 0x66
 974 |                  emit8 0xc7
 975 |                  x86_memindex_emit 0 basereg disp indexreg  shft
 976 |                  x86_imm_emit16 (fromIntegral imm)
 977 |          else do emit8 0xc7
 978 |                  x86_memindex_emit 0 basereg disp indexreg  shft
 979 |                  x86_imm_emit32 imm
 980 | 
 981 | -- LEA: Load Effective Address
 982 | 
 983 | x86_lea_mem :: Word8 -> Word32 -> CodeGen e s ()
 984 | x86_lea_mem reg mem = emit8 0x8d >> x86_mem_emit reg mem
 985 | 
 986 | x86_lea_membase :: Word8 -> Word8 -> Word32 -> CodeGen e s ()
 987 | x86_lea_membase reg basereg disp =
 988 |     emit8 0x8d >> x86_membase_emit reg basereg disp
 989 | 
 990 | x86_lea_memindex :: Word8 -> Word8 -> Word32 -> Word8 -> Word8 -> CodeGen e s ()
 991 | x86_lea_memindex reg basereg disp indexreg shft =
 992 |     emit8 0x8d >> x86_memindex_emit reg basereg disp indexreg shft
 993 | 
 994 | x86_widen_reg :: Word8 -> Word8 -> Bool -> Bool -> CodeGen e s ()
 995 | x86_widen_reg dreg reg is_signed is_half =
 996 |     if is_half || x86_is_byte_reg reg
 997 |     then do let op = 0xb6 + (if is_signed then 0x08 else 0) +
 998 |                      (if is_half then 0x1 else 0)
 999 |             emit8 0x0f
1000 |             emit8 op
1001 |             x86_reg_emit dreg reg
1002 |     else failCodeGen (PP.text "widen: need byte register or is_half=True")
1003 | 
1004 | x86_widen_mem :: Word8 -> Word32 -> Bool -> Bool -> CodeGen e s ()
1005 | x86_widen_mem dreg mem is_signed is_half =
1006 |     do let op = 0xb6 + (if is_signed then 0x08 else 0) +
1007 |                 (if is_half then 0x1 else 0)
1008 |        emit8 0x0f
1009 |        emit8 op
1010 |        x86_mem_emit dreg mem
1011 | 
1012 | x86_widen_membase :: Word8 -> Word8 -> Word32 -> Bool -> Bool -> CodeGen e s ()
1013 | x86_widen_membase dreg basereg disp is_signed is_half =
1014 |     do let op = 0xb6 + (if is_signed then 0x08 else 0) +
1015 |                 (if is_half then 0x1 else 0)
1016 |        emit8 0x0f
1017 |        emit8 op
1018 |        x86_membase_emit dreg basereg disp
1019 | 
1020 | x86_widen_memindex :: Word8 -> Word8 -> Word32 -> Word8 -> Word8 -> Bool -> Bool -> CodeGen e s ()
1021 | x86_widen_memindex dreg basereg disp indexreg shft is_signed is_half =
1022 |     do let op = 0xb6 + (if is_signed then 0x08 else 0) +
1023 |                 (if is_half then 0x1 else 0)
1024 |        emit8 0x0f
1025 |        emit8 op
1026 |        x86_memindex_emit dreg basereg disp indexreg shft
1027 | 
1028 | x86_cdq, x86_wait :: CodeGen s e ()
1029 | x86_cdq  = emit8 0x99
1030 | x86_wait = emit8 0x9b
1031 | 
1032 | x86_fp_op_mem :: Word8 -> Word32 -> Bool -> CodeGen e s ()
1033 | x86_fp_op_mem opc mem is_double =
1034 |     do emit8 (if is_double then 0xdc else 0xd8)
1035 |        x86_mem_emit opc mem
1036 | x86_fp_op_membase :: Word8 -> Word8 -> Word32 -> Bool -> CodeGen e s ()
1037 | x86_fp_op_membase opc basereg disp is_double =
1038 |     do emit8 (if is_double then 0xdc else 0xd8)
1039 |        x86_membase_emit opc basereg disp
1040 | x86_fp_op ::Word8 -> Word8 -> CodeGen e s ()
1041 | x86_fp_op opc index =
1042 |     do emit8 0xd8
1043 |        emit8 (0xc0 + (opc `shiftL` 3) + (index .&. 0x07))
1044 | x86_fp_op_reg :: Word8 -> Word8 -> Bool -> CodeGen e s ()
1045 | x86_fp_op_reg opc index pop_stack =
1046 |     do let  opcMap = [ 0, 1, 2, 3, 5, 4, 7, 6, 8]
1047 |        emit8 (if pop_stack then 0xde else 0xdc)
1048 |        emit8 (0xc0 + ((opcMap !! fromIntegral opc) `shiftL` 3) + (index .&. 0x07))
1049 | 
1050 | 
1051 | -- @x86_fp_int_op_membase
1052 | -- Supports FPU operations between ST(0) and integer operand in memory.
1053 | -- Operation encoded using X86_FP_Opcode enum.
1054 | -- Operand is addressed by [basereg + disp].
1055 | -- is_int specifies whether operand is int32 (TRUE) or int16 (FALSE).
1056 | 
1057 | x86_fp_int_op_membase :: Word8 -> Word8 -> Word32 -> Bool -> CodeGen e s ()
1058 | x86_fp_int_op_membase opc basereg disp is_int =
1059 |     do emit8 (if is_int then 0xda else 0xde)
1060 |        x86_membase_emit opc basereg disp
1061 | x86_fstp :: Word8 -> CodeGen e s ()
1062 | x86_fstp index =
1063 |     emit8 0xdd >> emit8 (0xd8 + index)
1064 | x86_fcompp :: CodeGen e s ()
1065 | x86_fcompp = emit8 0xde >> emit8 0xd9
1066 | x86_fucompp :: CodeGen e s ()
1067 | x86_fucompp = emit8 0xda >> emit8 0xe9
1068 | x86_fnstsw :: CodeGen e s ()
1069 | x86_fnstsw = emit8 0xdf >> emit8 0xe0
1070 | x86_fnstcw :: Word32 -> CodeGen e s ()
1071 | x86_fnstcw mem = emit8 0xd9 >> x86_mem_emit 7 mem
1072 | x86_fnstcw_membase :: Word8 -> Word32 -> CodeGen e s ()
1073 | x86_fnstcw_membase basereg disp =
1074 |     emit8 0xd9 >> x86_membase_emit 7 basereg disp
1075 | x86_fldcw :: Word32 -> CodeGen e s ()
1076 | x86_fldcw mem = emit8 0xd9 >> x86_mem_emit 5 mem
1077 | x86_fldcw_membase :: Word8 -> Word32 -> CodeGen e s ()
1078 | x86_fldcw_membase basereg disp =
1079 |     emit8 0xd9 >> x86_membase_emit 5 basereg disp
1080 | x86_fchs :: CodeGen e s ()
1081 | x86_fchs = emit8 0xd9 >> emit8 0xe0
1082 | x86_frem :: CodeGen e s ()
1083 | x86_frem = emit8 0xd9 >> emit8 0xf8
1084 | x86_fxch :: Word8 -> CodeGen e s ()
1085 | x86_fxch index = emit8 0xd9 >> emit8 (0xc8 + (index .&. 0x07))
1086 | x86_fcomi :: Word8 -> CodeGen e s ()
1087 | x86_fcomi index = emit8 0xdb >> emit8 (0xf0 + (index .&. 0x07))
1088 | x86_fcomip :: Word8 -> CodeGen e s ()
1089 | x86_fcomip index = emit8 0xdf >> emit8 (0xf0 + (index .&. 0x07))
1090 | x86_fucomi :: Word8 -> CodeGen e s ()
1091 | x86_fucomi index = emit8 0xdb >> emit8 (0xe8 + (index .&. 0x07))
1092 | x86_fucomip :: Word8 -> CodeGen e s ()
1093 | x86_fucomip index = emit8 0xdf >> emit8 (0xe8 + (index .&. 0x07))
1094 | 
1095 | data FIntSize = FInt16 | FInt32 | FInt64
1096 | 
1097 | x86_fld :: Word32 -> Bool -> CodeGen e s ()
1098 | x86_fld mem is_double =
1099 |     do emit8 (if is_double then 0xdd else 0xd9)
1100 |        x86_mem_emit 0 mem
1101 | x86_fld_membase :: Word8 -> Word32 -> Bool -> CodeGen e s ()
1102 | x86_fld_membase basereg disp is_double =
1103 |     do emit8 (if is_double then 0xdd else 0xd9)
1104 |        x86_membase_emit 0 basereg disp
1105 | x86_fld80_mem :: Word32 -> CodeGen e s ()
1106 | x86_fld80_mem mem = emit8 0xdb >> x86_mem_emit 5 mem
1107 | x86_fld80_membase :: Word8 -> Word32 -> CodeGen e s ()
1108 | x86_fld80_membase basereg disp =
1109 |     emit8 0xdb >> x86_membase_emit 5 basereg disp
1110 | x86_fild :: Word32 -> FIntSize -> CodeGen e s ()
1111 | x86_fild mem size =
1112 |     case size of
1113 |        FInt16 -> emit8 0xdf >> x86_mem_emit 0 mem
1114 |        FInt32 -> emit8 0xdb >> x86_mem_emit 0 mem
1115 |        FInt64 -> emit8 0xdf >> x86_mem_emit 5 mem
1116 | x86_fild_membase :: Word8 -> Word32 -> FIntSize -> CodeGen e s ()
1117 | x86_fild_membase basereg disp size =
1118 |     case size of
1119 |        FInt16 -> emit8 0xdb >> x86_membase_emit 0 basereg disp
1120 |        FInt32 -> emit8 0xdb >> x86_membase_emit 0 basereg disp
1121 |        FInt64 -> emit8 0xdf >> x86_membase_emit 5 basereg disp
1122 | x86_fld_reg :: Word8 -> CodeGen e s ()
1123 | x86_fld_reg index =
1124 |     emit8 0xd9 >> emit8 (0xc0 + (index .&. 0x07))
1125 | x86_fldz :: CodeGen e s ()
1126 | x86_fldz = emit8 0xd9 >> emit8 0xee
1127 | x86_fld1 :: CodeGen e s ()
1128 | x86_fld1 = emit8 0xd9 >> emit8 0xe8
1129 | x86_fldpi :: CodeGen e s ()
1130 | x86_fldpi = emit8 0xd9 >> emit8 0xeb
1131 | 
1132 | x86_fst :: Word32 -> Bool -> Bool -> CodeGen e s ()
1133 | x86_fst mem is_double pop_stack =
1134 |     do emit8 (if is_double then 0xdd else 0xd9)
1135 |        x86_mem_emit (2 + (if pop_stack then 1 else 0)) mem
1136 | x86_fst_membase :: Word8 -> Word32 -> Bool -> Bool -> CodeGen e s ()
1137 | x86_fst_membase basereg disp is_double pop_stack =
1138 |     do emit8 (if is_double then 0xdd else 0xd9)
1139 |        x86_membase_emit (2 + (if pop_stack then 1 else 0)) basereg disp
1140 | x86_fst80_mem :: Word32 -> CodeGen e s ()
1141 | x86_fst80_mem mem = emit8 0xdb >> x86_mem_emit 7 mem
1142 | x86_fst80_membase :: Word8 -> Word32 -> CodeGen e s ()
1143 | x86_fst80_membase basereg disp =
1144 |     emit8 0xdb >> x86_membase_emit 7 basereg disp
1145 | x86_fist_pop :: Word32 -> FIntSize -> CodeGen e s ()
1146 | x86_fist_pop mem size =
1147 |     case size of
1148 |        FInt16 -> emit8 0xdf >> x86_mem_emit 3 mem
1149 |        FInt32 -> emit8 0xdb >> x86_mem_emit 3 mem
1150 |        FInt64 -> emit8 0xdf >> x86_mem_emit 7 mem
1151 | x86_fist_pop_membase :: Word8 -> Word32 -> FIntSize -> CodeGen e s ()
1152 | x86_fist_pop_membase basereg disp size =
1153 |     case size of
1154 |        FInt16 -> emit8 0xdf >> x86_membase_emit 3 basereg disp
1155 |        FInt32 -> emit8 0xdb >> x86_membase_emit 3 basereg disp
1156 |        FInt64 -> emit8 0xdf >> x86_membase_emit 7 basereg disp
1157 | x86_fstsw :: CodeGen e s ()
1158 | x86_fstsw = emit8 0x9b >> emit8 0xdf >> emit8 0xe0
1159 | 
1160 | -- @x86_fist_membase
1161 | -- Converts content of ST(0) to integer and stores it at memory location
1162 | -- addressed by [basereg + disp].
1163 | -- size specifies whether destination is int32 or int16.
1164 | 
1165 | x86_fist_membase :: Word8 -> Word32 -> FIntSize -> CodeGen e s ()
1166 | x86_fist_membase basereg disp size =
1167 |     case size of
1168 |        FInt16 -> emit8 0xdf >> x86_membase_emit 2 basereg disp
1169 |        FInt32 -> emit8 0xdb >> x86_membase_emit 2 basereg disp
1170 |        FInt64 -> error "fist does not support 64 bit access"
1171 | 
1172 | x86_fincstp :: CodeGen e s ()
1173 | x86_fincstp = emit8 0xd9 >> emit8 0xf7
1174 | 
1175 | x86_fdecstp :: CodeGen e s ()
1176 | x86_fdecstp = emit8 0xd9 >> emit8 0xf6
1177 | 
1178 | -- PUSH instruction.
1179 | 
1180 | x86_push_reg :: Word8 -> CodeGen e s ()
1181 | x86_push_reg reg = emit8 (0x50 + reg)
1182 | 
1183 | x86_push_regp :: Word8 -> CodeGen e s ()
1184 | x86_push_regp reg = emit8 0xff >> x86_regp_emit 6 reg
1185 | 
1186 | x86_push_mem :: Word32 -> CodeGen e s ()
1187 | x86_push_mem mem = emit8 0xff >> x86_mem_emit 6 mem
1188 | 
1189 | x86_push_membase :: Word8 -> Word32 -> CodeGen e s ()
1190 | x86_push_membase basereg disp =
1191 |     emit8 0xff >> x86_membase_emit 6 basereg disp
1192 | 
1193 | x86_push_memindex :: Word8 -> Word32 -> Word8 -> Word8 -> CodeGen e s ()
1194 | x86_push_memindex basereg disp indexreg shft =
1195 |     emit8 0xff >> x86_memindex_emit 6 basereg disp indexreg shft
1196 | 
1197 | x86_push_imm_template :: CodeGen e s ()
1198 | x86_push_imm_template = x86_push_imm 0xf0f0f0f0
1199 | 
1200 | x86_push_imm :: Word32 -> CodeGen e s ()
1201 | x86_push_imm imm =
1202 |     if x86_is_imm8 imm
1203 |     then emit8 0x6A >> x86_imm_emit8 (fromIntegral imm)
1204 |     else emit8 0x68 >> x86_imm_emit32 imm
1205 | 
1206 | -- POP instruction.
1207 | 
1208 | x86_pop_reg :: Word8 -> CodeGen e s ()
1209 | x86_pop_reg reg = emit8 (0x58 + reg)
1210 | 
1211 | x86_pop_mem :: Word32 -> CodeGen e s ()
1212 | x86_pop_mem mem = emit8 0x87 >> x86_mem_emit 0 mem
1213 | 
1214 | x86_pop_membase :: Word8 -> Word32 -> CodeGen e s ()
1215 | x86_pop_membase basereg disp =
1216 |     emit8 0x87 >> x86_membase_emit 0 basereg disp
1217 | 
1218 | x86_pushad :: CodeGen e s ()
1219 | x86_pushad = emit8 0x60
1220 | 
1221 | x86_pushfd :: CodeGen e s ()
1222 | x86_pushfd = emit8 0x9c
1223 | 
1224 | x86_popad :: CodeGen e s ()
1225 | x86_popad  = emit8 0x61
1226 | 
1227 | x86_popfd :: CodeGen e s ()
1228 | x86_popfd  = emit8 0x9d
1229 | 
1230 | x86_loop ::  Word8 -> CodeGen e s ()
1231 | x86_loop imm = emit8 0xe2 >> x86_imm_emit8 imm
1232 | 
1233 | x86_loope :: Word8 -> CodeGen e s ()
1234 | x86_loope imm = emit8 0xe1 >> x86_imm_emit8 imm
1235 | 
1236 | x86_loopne :: Word8 -> CodeGen e s ()
1237 | x86_loopne imm = emit8 0xe0 >> x86_imm_emit8 imm
1238 | 
1239 | x86_jump32 :: Word32 -> CodeGen e s ()
1240 | x86_jump32 imm = emit8 0xe9 >> x86_imm_emit32 imm
1241 | 
1242 | x86_jump8 :: Word8 -> CodeGen e s ()
1243 | x86_jump8 imm = emit8 0xeb >> x86_imm_emit8 imm
1244 | 
1245 | x86_jump_reg :: Word8 -> CodeGen e s ()
1246 | x86_jump_reg reg = emit8 0xff >> x86_reg_emit 4 reg
1247 | 
1248 | x86_jump_mem :: Word32 -> CodeGen e s ()
1249 | x86_jump_mem mem = emit8 0xff >> x86_mem_emit 4 mem
1250 | 
1251 | x86_jump_membase :: Word8 -> Word32 -> CodeGen e s ()
1252 | x86_jump_membase basereg disp =
1253 |     emit8 0xff >> x86_membase_emit 4 basereg disp
1254 | 
1255 | x86_jump_pointer :: Ptr a -> CodeGen e s ()
1256 | x86_jump_pointer target =
1257 |     do inst <- getCodeOffset
1258 |        base <- getBasePtr
1259 |        let ptr = base `plusPtr` inst
1260 |        x86_jump32 (fromIntegral (target `minusPtr` ptr - 5))
1261 | 
1262 | -- target is a pointer in our buffer.
1263 | 
1264 | {-
1265 | x86_jump_code target =
1266 |     do inst <- getCodeOffset
1267 |        let t = target - inst - 2
1268 |        if x86_is_imm8 t
1269 |           then x86_jump8 (fromIntegral t)
1270 |           else x86_jump32 (fromIntegral (t - 3))
1271 | -}
1272 | {-
1273 | x86_jump_disp disp =
1274 |     do let t = disp - 2
1275 |        if x86_is_imm8 t
1276 |           then x86_jump8 (fromIntegral t)
1277 |           else x86_jump32 (t - 3)
1278 | -}
1279 | 
1280 | x86_branch8 :: Int -> Word8 -> Bool -> CodeGen e s ()
1281 | x86_branch8 cond imm is_signed =
1282 |     do if is_signed
1283 |           then emit8 (x86_cc_signed_map !! cond)
1284 |           else emit8 (x86_cc_unsigned_map !! cond)
1285 |        x86_imm_emit8 imm
1286 | 
1287 | x86_branch32 :: Int -> Word32 -> Bool -> CodeGen e s ()
1288 | x86_branch32 cond imm is_signed =
1289 |     do emit8 0x0f
1290 |        if is_signed
1291 |           then emit8 ((x86_cc_signed_map !! cond) + 0x10)
1292 |           else emit8 ((x86_cc_unsigned_map !! cond) + 0x10)
1293 |        x86_imm_emit32 imm
1294 | 
1295 | x86_branch :: Int -> Int -> Bool -> CodeGen e s ()
1296 | x86_branch cond target is_signed =
1297 |     do inst <- getCodeOffset
1298 |        let offset = target - inst - 2;
1299 |        if x86_is_imm8 offset
1300 |           then x86_branch8 cond (fromIntegral offset) is_signed
1301 |           else x86_branch32 cond (fromIntegral (offset - 4)) is_signed
1302 | 
1303 | x86_branch_pointer :: Int -> Ptr a -> Bool -> CodeGen e s ()
1304 | x86_branch_pointer cond target is_signed =
1305 |     do inst <- getCodeOffset
1306 |        base <- getBasePtr
1307 |        let ptr = base `plusPtr` inst
1308 |        x86_branch32 cond (fromIntegral (target `minusPtr` ptr - 5)) is_signed
1309 | 
1310 | {-
1311 | x86_branch_disp cond disp is_signed =
1312 |     do let offset = disp - 2
1313 |        if x86_is_imm8 offset
1314 |           then x86_branch8 cond (fromIntegral offset) is_signed
1315 |           else x86_branch32 cond (offset - 4) is_signed
1316 | -}
1317 | 
1318 | x86_jecxz :: Word8 -> CodeGen e s ()
1319 | x86_jecxz imm = emit8 0xe3 >> emit8 imm
1320 | 
1321 | x86_set_reg :: Int -> Word8 -> Bool -> CodeGen e s ()
1322 | x86_set_reg cond reg is_signed =
1323 |     do emit8 0x0f
1324 |        if is_signed
1325 |           then emit8 ((x86_cc_signed_map !! cond) + 0x20)
1326 |           else emit8 ((x86_cc_unsigned_map !! cond) + 0x20)
1327 |        x86_reg_emit 0 reg
1328 | 
1329 | x86_set_mem :: Int -> Word32 -> Bool -> CodeGen e s ()
1330 | x86_set_mem cond mem is_signed =
1331 |     do emit8 0x0f
1332 |        if is_signed
1333 |           then emit8 ((x86_cc_signed_map !! cond) + 0x20)
1334 |           else emit8 ((x86_cc_unsigned_map !! cond) + 0x20)
1335 |        x86_mem_emit 0 mem
1336 | x86_set_membase :: Int -> Word8 -> Word32 -> Bool -> CodeGen e s ()
1337 | x86_set_membase cond basereg disp is_signed =
1338 |     do emit8 0x0f
1339 |        if is_signed
1340 |           then emit8 ((x86_cc_signed_map !! cond) + 0x20)
1341 |           else emit8 ((x86_cc_unsigned_map !! cond) + 0x20)
1342 |        x86_membase_emit 0 basereg disp
1343 | 
1344 | -- Call instructions.
1345 | 
1346 | x86_call_imm :: Word32 -> CodeGen s e ()
1347 | x86_call_imm disp = emit8 0xe8 >> x86_imm_emit32 disp
1348 | 
1349 | x86_call_reg :: Word8 -> CodeGen s e ()
1350 | x86_call_reg reg = emit8 0xff >> x86_reg_emit 2 reg
1351 | 
1352 | x86_call_mem :: Word32 -> CodeGen s e ()
1353 | x86_call_mem mem = emit8 0xff >> x86_mem_emit 2 mem
1354 | 
1355 | x86_call_membase :: Word8 -> Word32 -> CodeGen s e ()
1356 | x86_call_membase basereg disp =
1357 |     emit8 0xff >> x86_membase_emit 2 basereg disp
1358 | 
1359 | x86_call_code :: Int -> CodeGen s e ()
1360 | x86_call_code target =
1361 |     do inst <- getCodeOffset
1362 |        let  _x86_offset = (target - inst - 5)
1363 |        x86_call_imm (fromIntegral _x86_offset)
1364 | 
1365 | x86_call_hs :: FunPtr a -> CodeGen e s ()
1366 | x86_call_hs fptr = do { offset <- getCodeOffset
1367 |                       ; base <- getBasePtr
1368 |                       ; emitRelocInfo (offset + 1)
1369 |                           RelocPCRel fptr
1370 |                       ; x86_call_imm $ (fromIntegral (minusPtr (castFunPtrToPtr fptr) (plusPtr base offset) - 5))
1371 |                       }
1372 | 
1373 | -- RET instruction.
1374 | 
1375 | x86_ret :: CodeGen s e ()
1376 | x86_ret = emit8 0xc3
1377 | 
1378 | x86_ret_imm :: Word16 -> CodeGen s e ()
1379 | x86_ret_imm imm =
1380 |     if imm == 0 then x86_ret else emit8 0xc2 >> x86_imm_emit16 imm
1381 | 
1382 | -- Conditional move instructions.
1383 | x86_cmov ::Int -> Bool -> CodeGen e s ()
1384 | x86_cmov cond is_signed =
1385 |     do emit8 0x0f
1386 |        if is_signed
1387 |           then emit8 ((x86_cc_signed_map !! cond) - 0x30)
1388 |           else emit8 ((x86_cc_unsigned_map !! cond) - 0x30)
1389 | x86_cmov_reg :: Int -> Bool -> Word8 -> Word8 -> CodeGen e s ()
1390 | x86_cmov_reg cond is_signed dreg reg =
1391 |     do x86_cmov cond is_signed
1392 |        x86_reg_emit dreg reg
1393 | x86_cmov_mem :: Int -> Bool -> Word8 -> Word32 -> CodeGen e s ()
1394 | x86_cmov_mem cond is_signed reg mem =
1395 |     do x86_cmov cond is_signed
1396 |        x86_mem_emit reg mem
1397 | x86_cmov_membase :: Int -> Bool -> Word8 -> Word8 -> Word32 -> CodeGen e s ()
1398 | x86_cmov_membase cond is_signed reg basereg disp =
1399 |     do x86_cmov cond is_signed
1400 |        x86_membase_emit reg basereg disp
1401 | 
1402 | -- Note: definition for ENTER instruction is not complete.  The counter
1403 | -- for the display setup is set to 0.
1404 | 
1405 | x86_enter :: Word16 -> CodeGen s e ()
1406 | x86_enter framesize = emit8 0xc8 >> x86_imm_emit16 framesize >> emit8 0
1407 | 
1408 | x86_leave :: CodeGen s e ()
1409 | x86_leave = emit8 0xc9
1410 | 
1411 | x86_sahf :: CodeGen s e ()
1412 | x86_sahf  = emit8 0x9e
1413 | 
1414 | -- Trigonometric floating point functions
1415 | 
1416 | x86_fsin, x86_fcos, x86_fabs, x86_ftst, x86_fxam, x86_fpatan,
1417 |  x86_fprem, x86_fprem1, x86_frndint, x86_fsqrt, x86_fptan :: CodeGen s e ()
1418 | x86_fsin    = emit8 0xd9 >> emit8 0xfe
1419 | x86_fcos    = emit8 0xd9 >> emit8 0xff
1420 | x86_fabs    = emit8 0xd9 >> emit8 0xe1
1421 | x86_ftst    = emit8 0xd9 >> emit8 0xe4
1422 | x86_fxam    = emit8 0xd9 >> emit8 0xe5
1423 | x86_fpatan  = emit8 0xd9 >> emit8 0xf3
1424 | x86_fprem   = emit8 0xd9 >> emit8 0xf8
1425 | x86_fprem1  = emit8 0xd9 >> emit8 0xf5
1426 | x86_frndint = emit8 0xd9 >> emit8 0xfc
1427 | x86_fsqrt   = emit8 0xd9 >> emit8 0xfa
1428 | x86_fptan   = emit8 0xd9 >> emit8 0xf2
1429 | 
1430 | -- Fast instruction sequences for 1 to 7-byte noops.
1431 | 
1432 | x86_padding :: (Eq t, Num t) => t -> CodeGen e s ()
1433 | x86_padding size =
1434 |     case size of
1435 |       1 -> x86_nop
1436 |       2 -> emit8 0x8b >> emit8  0xc0
1437 |       3 -> emit8 0x8d >> emit8 0x6d >> emit8 0x00
1438 |       4 -> emit8 0x8d >> emit8 0x64 >> emit8 0x24 >> emit8 0x00
1439 |       5 -> emit8 0x8d >> emit8 0x64 >> emit8 0x24 >> emit8 0x00 >>
1440 |            x86_nop
1441 |       6 -> emit8 0x8d >> emit8 0xad >>
1442 |            emit8 0x00 >> emit8 0x00 >>
1443 |            emit8 0x00 >> emit8 0x00
1444 |       7 -> emit8 0x8d >> emit8 0xa4 >>
1445 |            emit8 0x24 >> emit8 0x00 >>
1446 |            emit8 0x00 >> emit8 0x00 >>
1447 |            emit8 0x00
1448 |       _ -> failCodeGen (PP.text "invalid padding size")
1449 | 
1450 | -- Generate the code for a function prologue.  The frame_size is the
1451 | -- number of bytes to be allocated as the frame size, and the reg_mask
1452 | -- specifies which registers to save on function entry.
1453 | 
1454 | x86_prolog :: Int -> Int -> CodeGen e s ()
1455 | x86_prolog frame_size reg_mask =
1456 |     do x86_push_reg x86_ebp
1457 |        x86_mov_reg_reg x86_ebp x86_esp x86_dword_size
1458 |        gen_push 0 1
1459 |        if frame_size /= 0
1460 |           then x86_alu_reg_imm x86_sub x86_esp frame_size
1461 |           else return ()
1462 |   where
1463 |   gen_push i m =
1464 |      if i <= x86_edi
1465 |         then do if (reg_mask .&. m) /= 0
1466 |                    then x86_push_reg i
1467 |                    else return ()
1468 |                 gen_push (i + 1) (m `shiftL` 1)
1469 |         else return ()
1470 | 
1471 | -- Opposite to x86_prolog: destroys the stack frame and restores the
1472 | -- registers in reg_mask, which should be the same as the register mask
1473 | -- used on function entry.
1474 | 
1475 | x86_epilog :: Int -> CodeGen e s ()
1476 | x86_epilog reg_mask =
1477 |     do gen_pop x86_edi (1 `shiftL` (fromIntegral x86_edi))
1478 |        x86_mov_reg_reg x86_esp x86_ebp x86_dword_size
1479 |        x86_pop_reg x86_ebp
1480 |        x86_ret
1481 |   where
1482 |   gen_pop i m =
1483 |     if m /= 0
1484 |        then do if (reg_mask .&. m) /= 0
1485 |                   then x86_pop_reg i
1486 |                   else return ()
1487 |                gen_pop (i - 1) (m `shiftR` 1)
1488 |        else return ()
1489 | 
1490 | -- TODO: Move signatures to definition, delete duplicates.
1491 | x86_xchg_reg_reg ::
1492 |   (Eq a, Num a) =>
1493 |   Word8
1494 |   -> Word8
1495 |   -> a
1496 |   -> CodeGen e s ()
1497 | x86_xchg_mem_reg ::
1498 |   (Eq a, Num a) =>
1499 |   Word32
1500 |   -> Word8
1501 |   -> a
1502 |   -> CodeGen e s ()
1503 | x86_xchg_membase_reg ::
1504 |   (Eq a, Num a) =>
1505 |   Word8
1506 |   -> Word32
1507 |   -> Word8
1508 |   -> a
1509 |   -> CodeGen e s ()
1510 | x86_xadd_reg_reg ::
1511 |   (Eq a, Num a) =>
1512 |   Word8
1513 |   -> Word8
1514 |   -> a
1515 |   -> CodeGen e s ()
1516 | x86_xadd_mem_reg ::
1517 |   (Eq a, Num a) =>
1518 |   Word32
1519 |   -> Word8
1520 |   -> a
1521 |   -> CodeGen e s ()
1522 | x86_xadd_membase_reg ::
1523 |   (Eq a, Num a) =>
1524 |   Word8
1525 |   -> Word32
1526 |   -> Word8
1527 |   -> a
1528 |   -> CodeGen e s ()
1529 | x86_inc_mem ::
1530 |   Word32 -> CodeGen e s ()
1531 | x86_inc_membase ::
1532 |   Word8
1533 |   -> Word32
1534 |   -> CodeGen e s ()
1535 | x86_inc_reg ::
1536 |   Word8 -> CodeGen e s ()
1537 | x86_dec_mem ::
1538 |   Word32 -> CodeGen e s ()
1539 | x86_dec_membase ::
1540 |   Word8
1541 |   -> Word32
1542 |   -> CodeGen e s ()
1543 | x86_dec_reg ::
1544 |   Word8 -> CodeGen e s ()
1545 | x86_not_mem ::
1546 |   Word32 -> CodeGen e s ()
1547 | x86_not_membase ::
1548 |   Word8
1549 |   -> Word32
1550 |   -> CodeGen e s ()
1551 | x86_not_reg ::
1552 |   Word8 -> CodeGen e s ()
1553 | x86_neg_mem ::
1554 |   Word32 -> CodeGen e s ()
1555 | x86_neg_membase ::
1556 |   Word8
1557 |   -> Word32
1558 |   -> CodeGen e s ()
1559 | x86_neg_reg ::
1560 |   Word8 -> CodeGen e s ()
1561 | x86_alu_mem_imm ::
1562 |   Word8
1563 |   -> Word32
1564 |   -> Word32
1565 |   -> CodeGen e s ()
1566 | x86_alu_membase_imm ::
1567 |   Word8
1568 |   -> Word8
1569 |   -> Word32
1570 |   -> Word32
1571 |   -> CodeGen e s ()
1572 | x86_alu_membase8_imm ::
1573 |   Word8
1574 |   -> Word8
1575 |   -> Word32
1576 |   -> Word8
1577 |   -> CodeGen e s ()
1578 | x86_alu_mem_reg ::
1579 |   Word8
1580 |   -> Word32
1581 |   -> Word8
1582 |   -> CodeGen e s ()
1583 | x86_alu_membase_reg ::
1584 |   Word8
1585 |   -> Word8
1586 |   -> Word32
1587 |   -> Word8
1588 |   -> CodeGen e s ()
1589 | x86_alu_reg_reg ::
1590 |   Word8
1591 |   -> Word8
1592 |   -> Word8
1593 |   -> CodeGen e s ()
1594 | x86_alu_reg8_reg8 ::
1595 |   Word8
1596 |   -> Word8
1597 |   -> Word8
1598 |   -> Bool
1599 |   -> Bool
1600 |   -> CodeGen e s ()
1601 | x86_alu_reg_mem ::
1602 |   Word8
1603 |   -> Word8
1604 |   -> Word32
1605 |   -> CodeGen e s ()
1606 | x86_alu_reg_membase ::
1607 |   Word8
1608 |   -> Word8
1609 |   -> Word8
1610 |   -> Word32
1611 |   -> CodeGen e s ()
1612 | x86_test_reg_imm ::
1613 |   Word8
1614 |   -> Word32
1615 |   -> CodeGen e s ()
1616 | x86_test_mem_imm ::
1617 |   Word32
1618 |   -> Word32
1619 |   -> CodeGen e s ()
1620 | x86_test_membase_imm ::
1621 |   Word8
1622 |   -> Word32
1623 |   -> Word32
1624 |   -> CodeGen e s ()
1625 | x86_test_reg_reg ::
1626 |   Word8
1627 |   -> Word8
1628 |   -> CodeGen e s ()
1629 | x86_test_mem_reg ::
1630 |   Word32
1631 |   -> Word8
1632 |   -> CodeGen e s ()
1633 | x86_test_membase_reg ::
1634 |   Word8
1635 |   -> Word32
1636 |   -> Word8
1637 |   -> CodeGen e s ()
1638 | x86_shift_reg_imm ::
1639 |   Word8
1640 |   -> Word8
1641 |   -> Word8
1642 |   -> CodeGen e s ()
1643 | x86_shift_mem_imm ::
1644 |   Word8
1645 |   -> Word32
1646 |   -> Word8
1647 |   -> CodeGen e s ()
1648 | x86_shift_membase_imm ::
1649 |   Word8
1650 |   -> Word8
1651 |   -> Word32
1652 |   -> Word8
1653 |   -> CodeGen e s ()
1654 | x86_shift_reg ::
1655 |   Word8
1656 |   -> Word8
1657 |   -> CodeGen e s ()
1658 | x86_shift_mem ::
1659 |   Word8
1660 |   -> Word32
1661 |   -> CodeGen e s ()
1662 | x86_shift_membase ::
1663 |   Word8
1664 |   -> Word8
1665 |   -> Word32
1666 |   -> CodeGen e s ()
1667 | x86_shrd_reg ::
1668 |   Word8
1669 |   -> Word8
1670 |   -> CodeGen e s ()
1671 | x86_shrd_reg_imm ::
1672 |   Word8
1673 |   -> Word8
1674 |   -> Word8
1675 |   -> CodeGen e s ()
1676 | x86_shld_reg ::
1677 |   Word8
1678 |   -> Word8
1679 |   -> CodeGen e s ()
1680 | x86_shld_reg_imm ::
1681 |   Word8
1682 |   -> Word8
1683 |   -> Word8
1684 |   -> CodeGen e s ()
1685 | 
1686 | -- =============================================================================
1687 | -- SSE instructions.
1688 | -- =============================================================================
1689 | 
1690 | data X86_SSE_PFX = X86_SSE_SD
1691 |                  | X86_SSE_SS
1692 |                  | X86_SSE_PD
1693 |                  | X86_SSE_PS
1694 | --newtype X86_SSE_PFX = X86_SSE_PFX (forall e s. CodeGen e s ())
1695 | 
1696 | x86_sse_sd, x86_sse_ss, x86_sse_pd, x86_sse_ps :: X86_SSE_PFX
1697 | x86_sse_sd = X86_SSE_SD
1698 | x86_sse_ss = X86_SSE_SS
1699 | x86_sse_pd = X86_SSE_PD
1700 | x86_sse_ps = X86_SSE_PS
1701 | 
1702 | emit_sse :: X86_SSE_PFX -> CodeGen e s ()
1703 | emit_sse X86_SSE_SD = emit8 0xf2
1704 | emit_sse X86_SSE_SS = emit8 0xf3
1705 | emit_sse X86_SSE_PD = emit8 0x66
1706 | emit_sse X86_SSE_PS = return ()
1707 | 
1708 | x86_sqrt_sse_reg_reg :: X86_SSE_PFX -> Word8 -> Word8 -> CodeGen e s ()
1709 | x86_sqrt_sse_reg_reg pfx dreg reg =
1710 |     do emit_sse pfx
1711 |        emit8 0x0f
1712 |        emit8 0x51
1713 |        x86_reg_emit dreg reg
1714 | 
1715 | x86_sqrt_sse_reg_mem :: X86_SSE_PFX -> Word8 -> Word32 -> CodeGen e s ()
1716 | x86_sqrt_sse_reg_mem pfx dreg mem =
1717 |     do emit_sse pfx
1718 |        emit8 0x0f
1719 |        emit8 0x51
1720 |        x86_mem_emit dreg mem
1721 | 
1722 | x86_sqrt_sse_reg_membase :: X86_SSE_PFX -> Word8 -> Word8 -> Word32 -> CodeGen e s ()
1723 | x86_sqrt_sse_reg_membase pfx dreg basereg disp =
1724 |     do emit_sse pfx
1725 |        emit8 0x0f
1726 |        emit8 0x51
1727 |        x86_membase_emit dreg basereg disp
1728 | 
1729 | x86_add_sse_reg_reg :: X86_SSE_PFX -> Word8 -> Word8 -> CodeGen e s ()
1730 | x86_add_sse_reg_reg pfx dreg reg =
1731 |     do emit_sse pfx
1732 |        emit8 0x0f
1733 |        emit8 0x58
1734 |        x86_reg_emit dreg reg
1735 | 
1736 | x86_add_sse_reg_mem :: X86_SSE_PFX -> Word8 -> Word32 -> CodeGen e s ()
1737 | x86_add_sse_reg_mem pfx dreg mem =
1738 |     do emit_sse pfx
1739 |        emit8 0x0f
1740 |        emit8 0x58
1741 |        x86_mem_emit dreg mem
1742 | 
1743 | x86_add_sse_reg_membase :: X86_SSE_PFX -> Word8 -> Word8 -> Word32 -> CodeGen e s ()
1744 | x86_add_sse_reg_membase pfx dreg basereg disp =
1745 |     do emit_sse pfx
1746 |        emit8 0x0f
1747 |        emit8 0x58
1748 |        x86_membase_emit dreg basereg disp
1749 | 
1750 | x86_mul_sse_reg_reg :: X86_SSE_PFX -> Word8 -> Word8 -> CodeGen e s ()
1751 | x86_mul_sse_reg_reg pfx dreg reg =
1752 |     do emit_sse pfx
1753 |        emit8 0x0f
1754 |        emit8 0x59
1755 |        x86_reg_emit dreg reg
1756 | 
1757 | x86_mul_sse_reg_mem :: X86_SSE_PFX -> Word8 -> Word32 -> CodeGen e s ()
1758 | x86_mul_sse_reg_mem pfx dreg mem =
1759 |     do emit_sse pfx
1760 |        emit8 0x0f
1761 |        emit8 0x59
1762 |        x86_mem_emit dreg mem
1763 | 
1764 | x86_mul_sse_reg_membase :: X86_SSE_PFX -> Word8 -> Word8 -> Word32 -> CodeGen e s ()
1765 | x86_mul_sse_reg_membase pfx dreg basereg disp =
1766 |     do emit_sse pfx
1767 |        emit8 0x0f
1768 |        emit8 0x59
1769 |        x86_membase_emit dreg basereg disp
1770 | 
1771 | x86_sub_sse_reg_reg :: X86_SSE_PFX -> Word8 -> Word8 -> CodeGen e s ()
1772 | x86_sub_sse_reg_reg pfx dreg reg =
1773 |     do emit_sse pfx
1774 |        emit8 0x0f
1775 |        emit8 0x5c
1776 |        x86_reg_emit dreg reg
1777 | 
1778 | x86_sub_sse_reg_mem :: X86_SSE_PFX -> Word8 -> Word32 -> CodeGen e s ()
1779 | x86_sub_sse_reg_mem pfx dreg mem =
1780 |     do emit_sse pfx
1781 |        emit8 0x0f
1782 |        emit8 0x5c
1783 |        x86_mem_emit dreg mem
1784 | 
1785 | x86_sub_sse_reg_membase :: X86_SSE_PFX -> Word8 -> Word8 -> Word32 -> CodeGen e s ()
1786 | x86_sub_sse_reg_membase pfx dreg basereg disp =
1787 |     do emit_sse pfx
1788 |        emit8 0x0f
1789 |        emit8 0x5c
1790 |        x86_membase_emit dreg basereg disp
1791 | 
1792 | x86_min_sse_reg_reg :: X86_SSE_PFX -> Word8 -> Word8 -> CodeGen e s ()
1793 | x86_min_sse_reg_reg pfx dreg reg =
1794 |     do emit_sse pfx
1795 |        emit8 0x0f
1796 |        emit8 0x5d
1797 |        x86_reg_emit dreg reg
1798 | 
1799 | x86_min_sse_reg_mem :: X86_SSE_PFX -> Word8 -> Word32 -> CodeGen e s ()
1800 | x86_min_sse_reg_mem pfx dreg mem =
1801 |     do emit_sse pfx
1802 |        emit8 0x0f
1803 |        emit8 0x5d
1804 |        x86_mem_emit dreg mem
1805 | 
1806 | x86_min_sse_reg_membase :: X86_SSE_PFX -> Word8 -> Word8 -> Word32 -> CodeGen e s ()
1807 | x86_min_sse_reg_membase pfx dreg basereg disp =
1808 |     do emit_sse pfx
1809 |        emit8 0x0f
1810 |        emit8 0x5d
1811 |        x86_membase_emit dreg basereg disp
1812 | 
1813 | x86_div_sse_reg_reg :: X86_SSE_PFX -> Word8 -> Word8 -> CodeGen e s ()
1814 | x86_div_sse_reg_reg pfx dreg reg =
1815 |     do emit_sse pfx
1816 |        emit8 0x0f
1817 |        emit8 0x5e
1818 |        x86_reg_emit dreg reg
1819 | 
1820 | x86_div_sse_reg_mem :: X86_SSE_PFX -> Word8 -> Word32 -> CodeGen e s ()
1821 | x86_div_sse_reg_mem pfx dreg mem =
1822 |     do emit_sse pfx
1823 |        emit8 0x0f
1824 |        emit8 0x5e
1825 |        x86_mem_emit dreg mem
1826 | 
1827 | x86_div_sse_reg_membase :: X86_SSE_PFX -> Word8 -> Word8 -> Word32 -> CodeGen e s ()
1828 | x86_div_sse_reg_membase pfx dreg basereg disp =
1829 |     do emit_sse pfx
1830 |        emit8 0x0f
1831 |        emit8 0x5e
1832 |        x86_membase_emit dreg basereg disp
1833 | 
1834 | x86_max_sse_reg_reg :: X86_SSE_PFX -> Word8 -> Word8 -> CodeGen e s ()
1835 | x86_max_sse_reg_reg pfx dreg reg =
1836 |     do emit_sse pfx
1837 |        emit8 0x0f
1838 |        emit8 0x5f
1839 |        x86_reg_emit dreg reg
1840 | 
1841 | x86_max_sse_reg_mem :: X86_SSE_PFX -> Word8 -> Word32 -> CodeGen e s ()
1842 | x86_max_sse_reg_mem pfx dreg mem =
1843 |     do emit_sse pfx
1844 |        emit8 0x0f
1845 |        emit8 0x5f
1846 |        x86_mem_emit dreg mem
1847 | 
1848 | x86_max_sse_reg_membase :: X86_SSE_PFX -> Word8 -> Word8 -> Word32 -> CodeGen e s ()
1849 | x86_max_sse_reg_membase pfx dreg basereg disp =
1850 |     do emit_sse pfx
1851 |        emit8 0x0f
1852 |        emit8 0x5f
1853 |        x86_membase_emit dreg basereg disp
1854 | 
1855 | x86_mov_sse_reg_reg :: X86_SSE_PFX -> Word8 -> Word8 -> CodeGen e s ()
1856 | x86_mov_sse_reg_reg pfx dreg reg =
1857 |     do emit_sse pfx
1858 |        emit8 0x0f
1859 |        emit8 0x10
1860 |        x86_reg_emit dreg reg
1861 | 
1862 | x86_mov_sse_reg_mem :: X86_SSE_PFX -> Word8 -> Word32 -> CodeGen e s ()
1863 | x86_mov_sse_reg_mem pfx dreg mem =
1864 |     do emit_sse pfx
1865 |        emit8 0x0f
1866 |        emit8 0x10
1867 |        x86_mem_emit dreg mem
1868 | 
1869 | x86_mov_sse_reg_membase :: X86_SSE_PFX -> Word8 -> Word8 -> Word32 -> CodeGen e s ()
1870 | x86_mov_sse_reg_membase pfx dreg basereg disp =
1871 |     do emit_sse pfx
1872 |        emit8 0x0f
1873 |        emit8 0x10
1874 |        x86_membase_emit dreg basereg disp
1875 | 
1876 | x86_mov_sse_mem_reg :: X86_SSE_PFX -> Word32 -> Word8 -> CodeGen e s ()
1877 | x86_mov_sse_mem_reg pfx mem reg =
1878 |     do emit_sse pfx
1879 |        emit8 0x0f
1880 |        emit8 0x11
1881 |        x86_mem_emit reg mem
1882 | 
1883 | x86_mov_sse_membase_reg :: X86_SSE_PFX -> Word8 -> Word32 -> Word8 -> CodeGen e s ()
1884 | x86_mov_sse_membase_reg pfx basereg disp reg =
1885 |     do emit_sse pfx
1886 |        emit8 0x0f
1887 |        emit8 0x11
1888 |        x86_membase_emit reg basereg disp
1889 | 
1890 | x86_ucomisd_reg_reg :: Word8 -> Word8 -> CodeGen e s ()
1891 | x86_ucomisd_reg_reg dreg reg =
1892 |     do emit8 0x66
1893 |        emit8 0x0f
1894 |        emit8 0x2e
1895 |        x86_reg_emit dreg reg
1896 | 
1897 | x86_ucomisd_reg_mem :: Word8 -> Word32 -> CodeGen e s ()
1898 | x86_ucomisd_reg_mem dreg mem =
1899 |     do emit8 0x66
1900 |        emit8 0x0f
1901 |        emit8 0x2e
1902 |        x86_mem_emit dreg mem
1903 | 
1904 | x86_ucomisd_reg_membase :: Word8 -> Word8 -> Word32 -> CodeGen e s ()
1905 | x86_ucomisd_reg_membase dreg basereg disp =
1906 |     do emit8 0x66
1907 |        emit8 0x0f
1908 |        emit8 0x2e
1909 |        x86_membase_emit dreg basereg disp
1910 | 
1911 | x86_ucomiss_reg_reg :: Word8 -> Word8 -> CodeGen e s ()
1912 | x86_ucomiss_reg_reg dreg reg =
1913 |     do emit8 0x0f
1914 |        emit8 0x2e
1915 |        x86_reg_emit dreg reg
1916 | 
1917 | x86_ucomiss_reg_mem :: Word8 -> Word32 -> CodeGen e s ()
1918 | x86_ucomiss_reg_mem dreg mem =
1919 |     do emit8 0x0f
1920 |        emit8 0x2e
1921 |        x86_mem_emit dreg mem
1922 | 
1923 | x86_ucomiss_reg_membase :: Word8 -> Word8 -> Word32 -> CodeGen e s ()
1924 | x86_ucomiss_reg_membase dreg basereg disp =
1925 |     do emit8 0x0f
1926 |        emit8 0x2e
1927 |        x86_membase_emit dreg basereg disp
1928 | 
1929 | x86_comisd_reg_reg :: Word8 -> Word8 -> CodeGen e s ()
1930 | x86_comisd_reg_reg dreg reg =
1931 |     do emit8 0x66
1932 |        emit8 0x0f
1933 |        emit8 0x2f
1934 |        x86_reg_emit dreg reg
1935 | 
1936 | x86_comisd_reg_mem :: Word8 -> Word32 -> CodeGen e s ()
1937 | x86_comisd_reg_mem dreg mem =
1938 |     do emit8 0x66
1939 |        emit8 0x0f
1940 |        emit8 0x2f
1941 |        x86_mem_emit dreg mem
1942 | 
1943 | x86_comisd_reg_membase :: Word8 -> Word8 -> Word32 -> CodeGen e s ()
1944 | x86_comisd_reg_membase dreg basereg disp =
1945 |     do emit8 0x66
1946 |        emit8 0x0f
1947 |        emit8 0x2e
1948 |        x86_membase_emit dreg basereg disp
1949 | 
1950 | x86_comiss_reg_reg :: Word8 -> Word8 -> CodeGen e s ()
1951 | x86_comiss_reg_reg dreg reg =
1952 |     do emit8 0x0f
1953 |        emit8 0x2f
1954 |        x86_reg_emit dreg reg
1955 | 
1956 | x86_comiss_reg_mem :: Word8 -> Word32 -> CodeGen e s ()
1957 | x86_comiss_reg_mem dreg mem =
1958 |     do emit8 0x0f
1959 |        emit8 0x2f
1960 |        x86_mem_emit dreg mem
1961 | 
1962 | x86_comiss_reg_membase :: Word8 -> Word8 -> Word32 -> CodeGen e s ()
1963 | x86_comiss_reg_membase dreg basereg disp =
1964 |     do emit8 0x0f
1965 |        emit8 0x2e
1966 |        x86_membase_emit dreg basereg disp
1967 | 
1968 | 
1969 | newtype XMMReg = XMMReg Word8
1970 |     deriving (Eq, Ord)
1971 | 
1972 | newtype Mem = Mem Word32
1973 | 
1974 | data MemBase = MemBase Word8 Word32
1975 | 
1976 | 
1977 | class XMMLocation xmm where
1978 |    xmm_location_emit :: Word8 -> xmm -> CodeGen e s ()
1979 | 
1980 | instance XMMLocation XMMReg where
1981 |    xmm_location_emit dreg (XMMReg reg) =
1982 |       x86_reg_emit dreg reg
1983 | 
1984 | instance XMMLocation Mem where
1985 |    xmm_location_emit dreg (Mem mem) =
1986 |       x86_mem_emit dreg mem
1987 | 
1988 | instance XMMLocation MemBase where
1989 |    xmm_location_emit dreg (MemBase basereg disp) =
1990 |       x86_membase_emit dreg basereg disp
1991 | 
1992 | 
1993 | x86_movss_to_reg :: XMMLocation xmm => Word8 -> xmm -> CodeGen e s ()
1994 | x86_movss_to_reg dreg reg =
1995 |     do emit8 0xf3
1996 |        emit8 0x0f
1997 |        emit8 0x10
1998 |        xmm_location_emit dreg reg
1999 | 
2000 | x86_movss_from_reg :: XMMLocation xmm => Word8 -> xmm -> CodeGen e s ()
2001 | x86_movss_from_reg dreg reg =
2002 |     do emit8 0xf3
2003 |        emit8 0x0f
2004 |        emit8 0x11
2005 |        xmm_location_emit dreg reg
2006 | 
2007 | x86_movsd_to_reg :: XMMLocation xmm => Word8 -> xmm -> CodeGen e s ()
2008 | x86_movsd_to_reg dreg reg =
2009 |     do emit8 0xf2
2010 |        emit8 0x0f
2011 |        emit8 0x10
2012 |        xmm_location_emit dreg reg
2013 | 
2014 | x86_movsd_from_reg :: XMMLocation xmm => Word8 -> xmm -> CodeGen e s ()
2015 | x86_movsd_from_reg dreg reg =
2016 |     do emit8 0xf2
2017 |        emit8 0x0f
2018 |        emit8 0x11
2019 |        xmm_location_emit dreg reg
2020 | 
2021 | 
2022 | -- | xmm must not be a register
2023 | x86_movlps_to_reg :: XMMLocation xmm => Word8 -> xmm -> CodeGen e s ()
2024 | x86_movlps_to_reg dreg reg =
2025 |     do emit8 0x0f
2026 |        emit8 0x12
2027 |        xmm_location_emit dreg reg
2028 | 
2029 | -- | xmm must not be a register
2030 | x86_movlps_from_reg :: XMMLocation xmm => Word8 -> xmm -> CodeGen e s ()
2031 | x86_movlps_from_reg dreg reg =
2032 |     do emit8 0x0f
2033 |        emit8 0x13
2034 |        xmm_location_emit dreg reg
2035 | 
2036 | -- | xmm must not be a register
2037 | x86_movlpd_to_reg :: XMMLocation xmm => Word8 -> xmm -> CodeGen e s ()
2038 | x86_movlpd_to_reg dreg reg =
2039 |     do emit8 0x66
2040 |        emit8 0x0f
2041 |        emit8 0x12
2042 |        xmm_location_emit dreg reg
2043 | 
2044 | -- | xmm must not be a register
2045 | x86_movlpd_from_reg :: XMMLocation xmm => Word8 -> xmm -> CodeGen e s ()
2046 | x86_movlpd_from_reg dreg reg =
2047 |     do emit8 0x66
2048 |        emit8 0x0f
2049 |        emit8 0x13
2050 |        xmm_location_emit dreg reg
2051 | 
2052 | 
2053 | x86_movups_to_reg :: XMMLocation xmm => Word8 -> xmm -> CodeGen e s ()
2054 | x86_movups_to_reg dreg reg =
2055 |     do emit8 0x0f
2056 |        emit8 0x10
2057 |        xmm_location_emit dreg reg
2058 | 
2059 | x86_movups_from_reg :: XMMLocation xmm => Word8 -> xmm -> CodeGen e s ()
2060 | x86_movups_from_reg dreg reg =
2061 |     do emit8 0x0f
2062 |        emit8 0x11
2063 |        xmm_location_emit dreg reg
2064 | 
2065 | x86_movupd_to_reg :: XMMLocation xmm => Word8 -> xmm -> CodeGen e s ()
2066 | x86_movupd_to_reg dreg reg =
2067 |     do emit8 0x66
2068 |        emit8 0x0f
2069 |        emit8 0x10
2070 |        xmm_location_emit dreg reg
2071 | 
2072 | x86_movupd_from_reg :: XMMLocation xmm => Word8 -> xmm -> CodeGen e s ()
2073 | x86_movupd_from_reg dreg reg =
2074 |     do emit8 0x66
2075 |        emit8 0x0f
2076 |        emit8 0x11
2077 |        xmm_location_emit dreg reg
2078 | 
2079 | 
2080 | x86_haddps :: XMMLocation xmm => Word8 -> xmm -> CodeGen e s ()
2081 | x86_haddps dreg reg =
2082 |     do emit8 0xf2
2083 |        emit8 0x0f
2084 |        emit8 0x7c
2085 |        xmm_location_emit dreg reg
2086 | 
2087 | x86_haddpd :: XMMLocation xmm => Word8 -> xmm -> CodeGen e s ()
2088 | x86_haddpd dreg reg =
2089 |     do emit8 0x66
2090 |        emit8 0x0f
2091 |        emit8 0x7c
2092 |        xmm_location_emit dreg reg
2093 | 
2094 | 
2095 | x86_shufps :: XMMLocation xmm => Word8 -> xmm -> Word8 -> CodeGen e s ()
2096 | x86_shufps dreg reg src =
2097 |     do emit8 0x0f
2098 |        emit8 0xc6
2099 |        xmm_location_emit dreg reg
2100 |        emit8 src
2101 | 
2102 | x86_shufpd :: XMMLocation xmm => Word8 -> xmm -> Word8 -> CodeGen e s ()
2103 | x86_shufpd dreg reg src =
2104 |     do emit8 0x66
2105 |        emit8 0x0f
2106 |        emit8 0xc6
2107 |        xmm_location_emit dreg reg
2108 |        emit8 src
2109 | 
2110 | 
2111 | x86_cvtdq2ps :: XMMLocation xmm => Word8 -> xmm -> CodeGen e s ()
2112 | x86_cvtdq2ps dreg reg =
2113 |     do emit8 0x0f
2114 |        emit8 0x5b
2115 |        xmm_location_emit dreg reg
2116 | 
2117 | x86_cvttps2dq :: XMMLocation xmm => Word8 -> xmm -> CodeGen e s ()
2118 | x86_cvttps2dq dreg reg =
2119 |     do emit8 0xf3
2120 |        emit8 0x0f
2121 |        emit8 0x5b
2122 |        xmm_location_emit dreg reg
2123 | 
2124 | 
2125 | 
2126 | -- =============================================================================
2127 | -- Prefetching instructions.
2128 | -- =============================================================================
2129 | 
2130 | x86_prefetch0_mem :: Word32 -> CodeGen e s ()
2131 | x86_prefetch0_mem m = x86_prefetch_mem 1 m
2132 | 
2133 | x86_prefetch1_mem :: Word32 -> CodeGen e s ()
2134 | x86_prefetch1_mem m = x86_prefetch_mem 2 m
2135 | 
2136 | x86_prefetch2_mem :: Word32 -> CodeGen e s ()
2137 | x86_prefetch2_mem m = x86_prefetch_mem 3 m
2138 | 
2139 | x86_prefetchnta_mem :: Word32 -> CodeGen e s ()
2140 | x86_prefetchnta_mem m = x86_prefetch_mem 0 m
2141 | 
2142 | x86_prefetch_mem :: Word8 -> Word32 -> CodeGen e s ()
2143 | x86_prefetch_mem hint disp =
2144 |     do emit8 0x0f
2145 |        emit8 0x18
2146 |        x86_address_byte 0 hint 0
2147 |        x86_imm_emit32 disp
2148 | 
2149 | x86_prefetch0_membase :: Word8 -> Word32 -> CodeGen e s ()
2150 | x86_prefetch0_membase r m = x86_prefetch_membase 1 r m
2151 | 
2152 | x86_prefetch1_membase :: Word8 -> Word32 -> CodeGen e s ()
2153 | x86_prefetch1_membase r m = x86_prefetch_membase 2 r m
2154 | 
2155 | x86_prefetch2_membase :: Word8 -> Word32 -> CodeGen e s ()
2156 | x86_prefetch2_membase r m = x86_prefetch_membase 3 r m
2157 | 
2158 | x86_prefetchnta_membase :: Word8 -> Word32 -> CodeGen e s ()
2159 | x86_prefetchnta_membase r m = x86_prefetch_membase 0 r m
2160 | 
2161 | x86_prefetch_membase :: Word8 -> Word8 -> Word32 -> CodeGen e s ()
2162 | x86_prefetch_membase hint reg disp =
2163 |     do emit8 0x0f
2164 |        emit8 0x18
2165 |        x86_membase_emit hint reg disp
2166 | 
2167 | x86_prefetch0_regp :: Word8 -> CodeGen e s ()
2168 | x86_prefetch0_regp r = x86_prefetch_regp 1 r
2169 | 
2170 | x86_prefetch1_regp :: Word8 -> CodeGen e s ()
2171 | x86_prefetch1_regp r = x86_prefetch_regp 2 r
2172 | 
2173 | x86_prefetch2_regp :: Word8 -> CodeGen e s ()
2174 | x86_prefetch2_regp r = x86_prefetch_regp 3 r
2175 | 
2176 | x86_prefetchnta_regp :: Word8 -> CodeGen e s ()
2177 | x86_prefetchnta_regp r = x86_prefetch_regp 0 r
2178 | 
2179 | x86_prefetch_regp :: Word8 -> Word8 -> CodeGen e s ()
2180 | x86_prefetch_regp hint reg =
2181 |     do emit8 0x0f
2182 |        emit8 0x18
2183 |        x86_regp_emit hint reg
2184 | 
2185 | 


--------------------------------------------------------------------------------
/Harpy/X86Disassembler.hs:
--------------------------------------------------------------------------------
 1 | --------------------------------------------------------------------------
 2 | -- |
 3 | -- Module      :  Harpy.X86Disassembler
 4 | -- Copyright   :  (c) Martin Grabmueller and Dirk Kleeblatt
 5 | -- License     :  BSD3
 6 | --
 7 | -- Maintainer  :  martin@grabmueller.de
 8 | -- Stability   :  provisional
 9 | -- Portability :  portable
10 | --
11 | -- Disassembler for x86 machine code.
12 | --
13 | -- This is a module for compatibility with earlier Harpy releases.  It
14 | -- re-exports the disassembler from the disassembler package.
15 | --------------------------------------------------------------------------
16 | 
17 | module Harpy.X86Disassembler(
18 |   -- * Types
19 |   Opcode,
20 |   Operand(..),
21 |   InstrOperandSize(..),
22 |   Instruction(..),
23 |   ShowStyle(..),
24 |   -- * Functions
25 |   disassembleBlock,
26 |   disassembleList,
27 |   disassembleArray,
28 |   showIntel,
29 |   showAtt
30 |   ) where
31 | 
32 | import Text.Disassembler.X86Disassembler
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) Dirk Kleeblatt and Martin Grabmueller
 2 | 
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions
 7 | are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright
10 |    notice, this list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright
13 |    notice, this list of conditions and the following disclaimer in the
14 |    documentation and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the author nor the names of his contributors
17 |    may be used to endorse or promote products derived from this software
18 |    without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
21 | OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
24 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 | POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: all clean install reinstall uninstall doc
 2 | 
 3 | all:
 4 | 	runhaskell Setup.hs configure --prefix=$(HOME)/
 5 | 	runhaskell Setup.hs build
 6 | 
 7 | clean:
 8 | 	runhaskell Setup.hs clean
 9 | 
10 | 
11 | install:
12 | 	runhaskell Setup.hs install --user
13 | 
14 | reinstall:
15 | 	runhaskell Setup.hs clean
16 | 	runhaskell Setup.hs configure --prefix=$(HOME)/
17 | 	runhaskell Setup.hs build
18 | 	runhaskell Setup.hs install --user
19 | 
20 | uninstall:
21 | 	runhaskell Setup.hs unregister --user
22 | 
23 | doc:
24 | 	runhaskell Setup.hs haddock
25 | 


--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
  1 | 							-*-outline-*-
  2 | * Harpy NEWS
  3 | 
  4 | ** Updated for move to Github.
  5 | 
  6 | *** Re-licensed under BSD3
  7 | 
  8 | ** New in version 0.6.0.0
  9 | 
 10 | *** Re-licensed under BSD3
 11 | 
 12 | *** Updated to work with GHC 7.8.3
 13 | 
 14 | ** New in version 0.4.2
 15 | 
 16 | *** package structure
 17 | 
 18 | - The disassembler is now in a separate package, and is re-exported for
 19 |   compatibility by Harpy. This allows us to give the more liberal BSD licence
 20 |   to the disassembler.
 21 | 
 22 | *** New instructions
 23 | 
 24 | - The "mov", "jcc" (jump on condition code), and "cmp" instructions support
 25 |   "Ptr a" as operand
 26 | 
 27 | - Henning Thielemann provided some further floating point and SSE
 28 |   instructions.
 29 | 
 30 | - The loop instruction supports labels now.
 31 | 
 32 | *** Enhancements
 33 | 
 34 | - The disassembly of Harpy's internal code buffers includes all labels
 35 |   now, even when multiple labels are defined for the same location.
 36 | 
 37 | ** New in version 0.4.1
 38 | 
 39 | *** New Instances
 40 | 
 41 | - Many of Harpy's types are now instances of Eq.
 42 | 
 43 | *** New instructions
 44 | 
 45 | - Added support for the prefetching instructions PREFETCH0, PREFETCH1,
 46 |   PREFETCH2 and PREFETCHNTA.
 47 | 
 48 | *** Bug fixes
 49 | 
 50 | - Harpy.X86Disassembler.disassembleBloc was too strict and caused
 51 |   stack overflows for large inputs.  This was fixed.
 52 | 
 53 | - Disassembler: The instruction prefix list was not cleared when
 54 |   beginning to parse a new instruction.  This caused incorrect
 55 |   disassembly of SSE instructions.
 56 | 
 57 | - Disassembler: A bug has been fixed in the parsing routine for the
 58 |   addressing mode "scaled index + 32 bit offset" without base
 59 |   register.
 60 | 
 61 | ** New in version 0.4
 62 | 
 63 | - New convenience top-level module "Harpy", which re-exports
 64 |   Harpy.CodeGenMonad, Harpy.Call and Harpy.X86Assembler
 65 | 
 66 | - It is now possible to override Harpy's automatic code buffer
 67 |   management.  The new field 'customCodeBuffer' in the type
 68 |   'CodeGenConfig' can be set to 'Just (buf, size)', where 'buf' is a
 69 |   pointer to a memory region of 'size' bytes.  Harpy will then use the
 70 |   supplied code buffer and will not perform any automatic code buffer
 71 |   allocation on overflow.  Overflow checking is still performed and
 72 |   will result in an exception in the CodeGen monad.
 73 | 
 74 | - When using the high-level assembler in X86Assembler, the code buffer
 75 |   is automatically protected from overflow.
 76 | 
 77 | - Floating point operations added to X86Assembler (only for double
 78 |   operands yet).
 79 | 
 80 | - Preliminary support for SSE instructions.  Currently, only the
 81 |   packed and scalar floating-point arithmetic operations are supported
 82 |   (both in the low-level module Harpy.X86CodeGen and as methods in
 83 |   Harpy.X86Assembler)
 84 | 
 85 | - Code buffer default size has been increased from 128 to 4096 bytes.
 86 | 
 87 | - The CodeGenMonad fails when a label is defined twice.
 88 | 
 89 | - It is now possible to associate names with labels, using the new
 90 |   operation newNamedLabel.  The given names will show up in the
 91 |   disassembly, which makes debugging of generated code much easier.
 92 | 
 93 | - The doc directory contains a second, slightly larger tutorial now.
 94 | 
 95 | - The examples/evaluator directory contains a small example
 96 |   interpreter for arithmetic expressions, which translates expressions
 97 |   entered at the keayboard to machine code on the fly.  This is the
 98 |   demo program we presented at the Haskell Workshop 2007.
 99 | 
100 | ** New in version 0.2
101 | 
102 | - Everything is new!  This is the first released version.
103 | 
104 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # harpy - Runtime code generation for x86 machine code
 2 | 
 3 |     Codename: Harpy - Haskell Assembler at Run-time produces Y...
 4 |       Harpy [myth.]	f: die Harpyie
 5 |       http://en.wikipedia.org/wiki/Harpy
 6 | 
 7 | ## Introduction
 8 | 
 9 | Harpy is a library for run-time code generation in Haskell programs.
10 | 
11 | Harpy requires several Haskell extensions and GHC-specific features
12 | (the Haskell FFI, Template Haskell, multi-parameter type classes and
13 | monad transformers).
14 | 
15 | ## Features
16 | 
17 | The following modules are included in this package:
18 | 
19 | Harpy.CodeGenMonad: This module defines the code generator monad,
20 |   which is a combined state/reader/exception monad.  It contains
21 |   all the necessary details for allocating and managing code buffers.
22 | 
23 | Harpy.X86CodeGen: This module contains all the functions for generating
24 |   native x86 machine code.  The functions are very simple, and it is
25 |   necessary to specify all addressing modes etc. when emitting an
26 |   instruction.
27 | 
28 | Harpy.X86Assembler: A type class based layer on top of X86CodeGen
29 |   which determines the addressing modes from the types of the
30 |   operands.
31 | 
32 | Harpy.X86CGCombinators: Code generation combinators for conditionals,
33 |   loops, function entry/exit code etc.
34 | 
35 | Harpy.X86Disassembler: A disassembler for x86 machine code.
36 | 
37 | Harpy.Call: Exports functions for invoking the generated code.
38 | 
39 | ## Notes about the implementation
40 | 
41 | ### X86CodeGen.lhs
42 | 
43 | The file X86CodeGen.lhs is based on a header file called x86-codegen.h
44 | from the Mono distribution, which defines macros for emitting x86
45 | machine code directly into a memory buffer.  The Haskell module is a
46 | nearly one-to-one mapping from the original macros to Haskell
47 | functions.  The main differences are:
48 | 
49 | - Instead of emitting the data directly into a buffer, it uses the
50 |   CodeGen monad from file CodeGenMonad.lhs.
51 | 
52 | - The functions are strongly typed.
53 | 
54 | Several things should be kept in mind when using this file:
55 | 
56 | - Buffer overflow checks have to be done manually with checkBufferSize or
57 |   ensureBufferSize
58 | 
59 | - MMX, SSE, SSE2 and SSE3 instructions and registers are not supported.
60 | 
61 | - 64-bit mode is not supported.
62 | 
63 | - The disassembler supports (in principle) 64-bit mode and SSE
64 |   instructions, but this has not been tested.
65 | 


--------------------------------------------------------------------------------
/Setup.hs:
--------------------------------------------------------------------------------
1 | import Distribution.Simple
2 | main = defaultMain
3 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
 1 | all: larger-tutorial.pdf
 2 | 
 3 | larger-tutorial.pdf: larger-tutorial.tex 
 4 | 
 5 | larger-tutorial.tex: larger-tutorial.lhs
 6 | 	lhs2TeX $< > $@
 7 | 
 8 | %.pdf: %.tex
 9 | 	pdflatex $<
10 | 	pdflatex $<
11 | 
12 | clean:
13 | 	rm -f larger-tutorial.aux larger-tutorial.toc\
14 |  larger-tutorial.tex larger-tutorial.log larger-tutorial.out\
15 |  larger-tutorial.ptb
16 | 
17 | really-clean: clean
18 | 	rm -f larger-tutorial.pdf
19 | 


--------------------------------------------------------------------------------
/doc/larger-tutorial.lhs:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mgrabmueller/harpy/a4844756a0ea84c545b00dc5141a88d0e069cba5/doc/larger-tutorial.lhs


--------------------------------------------------------------------------------
/doc/tutorial.lhs:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mgrabmueller/harpy/a4844756a0ea84c545b00dc5141a88d0e069cba5/doc/tutorial.lhs


--------------------------------------------------------------------------------
/examples/evaluator/ArithParser.hs:
--------------------------------------------------------------------------------
 1 | module ArithParser where
 2 | 
 3 | import Control.Monad
 4 | 
 5 | import ArithTypes
 6 | 
 7 | import Foreign
 8 | 
 9 | import Text.ParserCombinators.Parsec
10 | import qualified Text.ParserCombinators.Parsec.Token as P
11 | import Text.ParserCombinators.Parsec.Language
12 | import Text.ParserCombinators.Parsec.Expr
13 | 
14 | lexer :: P.TokenParser ()
15 | lexer  = P.makeTokenParser
16 |          (haskellStyle
17 |          { reservedOpNames = ["*","/","+","-"]
18 |          })
19 | 
20 | statement :: Parser Stmt
21 | statement = do s <- statement'
22 |                eof
23 |                return s
24 | 
25 | statement' :: Parser Stmt
26 | statement' = try((do [i] <- identifier
27 |                      if i < 'a' || i > 'z'
28 |                        then fail "character a-z expected"
29 |                        else return ()
30 |                      symbol ":="
31 |                      e <- expr
32 |                      return $ Assign i e) <?> "assignment")
33 |             <|> liftM Cmd cmd
34 |             <|> liftM Print expr
35 | 
36 | cmd :: Parser Cmd
37 | cmd = try (do symbol ":help"
38 |               return Help)
39 |       <|> try (do symbol ":verbose"
40 |                   return Verbose)
41 |       <|> (do symbol ":quit"
42 |               return Quit)
43 | 
44 | expr    :: Parser Exp
45 | expr    = buildExpressionParser table factor
46 |         <?> "expression"
47 | 
48 | table   = [[op "*" Mul AssocLeft, op "/" Div AssocLeft]
49 |           ,[op "+" Add AssocLeft, op "-" Sub AssocLeft]
50 |           ]
51 |         where
52 |           op s f assoc
53 |              = Infix (do{ reservedOp s; return f} <?> "operator") assoc
54 | 
55 | factor  =   parens expr
56 |         <|> liftM (Lit . fromInteger) natural
57 |         <|> (do [i] <- identifier
58 |                 if i < 'a' || i > 'z'
59 |                    then fail "character a-z expected"
60 |                    else return ()
61 |                 return $ Var i)
62 |         <?> "simple expression"
63 | 
64 | whiteSpace= P.whiteSpace lexer
65 | lexeme    = P.lexeme lexer
66 | symbol    = P.symbol lexer
67 | natural   = P.natural lexer
68 | parens    = P.parens lexer
69 | semi      = P.semi lexer
70 | identifier= P.identifier lexer
71 | reserved  = P.reserved lexer
72 | reservedOp= P.reservedOp lexer
73 | 
74 | 


--------------------------------------------------------------------------------
/examples/evaluator/ArithTypes.hs:
--------------------------------------------------------------------------------
 1 | module ArithTypes where
 2 | 
 3 | import Foreign
 4 | 
 5 | data Stmt = Assign Char Exp
 6 |           | Print Exp
 7 |           | Cmd Cmd
 8 |           deriving (Show)
 9 | 
10 | data Cmd = Help
11 |          | Quit
12 |          | Verbose
13 |          deriving (Show)
14 | 
15 | data Exp = Add Exp Exp
16 |          | Sub Exp Exp
17 |          | Mul Exp Exp
18 |          | Div Exp Exp
19 |          | Lit Int32
20 |          | Var Char
21 |          deriving (Show)
22 | 
23 | 


--------------------------------------------------------------------------------
/examples/evaluator/Evaluator.hs:
--------------------------------------------------------------------------------
  1 | module Main(main) where
  2 | 
  3 | import ArithTypes
  4 | import ArithParser
  5 | 
  6 | import Harpy
  7 | import Harpy.X86Disassembler
  8 | 
  9 | import Foreign
 10 | 
 11 | import Control.Monad
 12 | 
 13 | import System.Console.Readline
 14 | 
 15 | import Text.ParserCombinators.Parsec
 16 | 
 17 | $(callDecl "callAsWord32" [t|Word32 -> IO Word32|])
 18 | 
 19 | main :: IO ()
 20 | main = do putStrLn "\n\n\n\nHarpy Interpreter"
 21 |           putStrLn "(type :help to see a help message)"
 22 |           allocaArray 26 (\ p -> mapM_ (\ i -> poke (advancePtr p i) 0) [0..25] >> repl p False)
 23 | 
 24 | repl :: Ptr Int32 -> Bool -> IO ()
 25 | repl env verbose =
 26 |     do s <- readline "@ "
 27 |        case s of
 28 |          Nothing -> return ()
 29 |          Just s' -> do addHistory s'
 30 |                        interpret env verbose s'
 31 | 
 32 | interpret :: Ptr Int32 -> Bool -> String -> IO ()
 33 | interpret env verbose s =
 34 |     do let e = parse statement "<standard input>" s
 35 |        case e of
 36 |          Left err -> do putStrLn (show err)
 37 |                         repl env verbose
 38 |          Right stmt -> run env verbose stmt
 39 | 
 40 | run :: Ptr Int32 -> Bool -> Stmt -> IO ()
 41 | run env verbose (Cmd Help) =
 42 |     do putStrLn "Enter an arithmetic expression to evaluate it"
 43 |        putStrLn "  e.g. 5 / 2"
 44 |        putStrLn "Enter an assignment to set a variable"
 45 |        putStrLn "  e.g. a := 4 * 2 - (6 + 1)"
 46 |        putStrLn "Enter :help to see this message again"
 47 |        putStrLn "Enter :quit to exit"
 48 |        putStrLn "Enter :verbose to toggle disassembly output"
 49 |        repl env verbose
 50 | 
 51 | run env _ (Cmd Quit) = return ()
 52 | 
 53 | run env verbose (Cmd Verbose) = repl env (Prelude.not verbose)
 54 | 
 55 | run env verbose stmt@(Assign var exp) =
 56 |     do (i, ins) <- eval' env stmt
 57 |        when verbose (mapM_ (putStrLn . showIntel) ins)
 58 |        repl env verbose
 59 | 
 60 | run env verbose stmt@(Print exp) =
 61 |     do (i, ins) <- eval' env stmt
 62 |        putStrLn (show i)
 63 |        when verbose (mapM_ (putStrLn . showIntel) ins)
 64 |        repl env verbose
 65 | 
 66 | -- Function for compiling and executing statements.
 67 | eval' :: Ptr Int32 -> Stmt -> IO (Int32, [Instruction])
 68 | eval' env e = do (_, Right v) <- runCodeGen (compileAndRun e) env ()
 69 |                  return v
 70 | 
 71 | compileAndRun :: Stmt -> CodeGen (Ptr Int32) s (Int32, [Instruction])
 72 | compileAndRun (Assign c exp) =
 73 |     do entryCode
 74 |        compileExp exp
 75 |        env <- getEnv
 76 |        mov (variableAddress env c) eax
 77 |        exitCode
 78 |        d <- disassemble
 79 |        callAsVoid 0
 80 |        return (0, d)
 81 | compileAndRun (Print exp) =
 82 |     do entryCode
 83 |        compileExp exp
 84 |        exitCode
 85 |        d <- disassemble
 86 |        r <- callAsWord32 0
 87 |        return (fromIntegral r, d)
 88 | 
 89 | compileExp :: Exp -> CodeGen (Ptr Int32) s ()
 90 | compileExp (Add e1 e2) = compileBinOp e1 e2 (add eax (Ind esp))
 91 | compileExp (Sub e1 e2) = compileBinOp e1 e2 (sub eax (Ind esp))
 92 | compileExp (Mul e1 e2) = compileBinOp e1 e2 (imul InPlace eax (Ind esp))
 93 | compileExp (Div e1 e2) = compileBinOp e1 e2 (cdq >> idiv (Ind esp))
 94 | compileExp (Lit i) = mov eax ((fromIntegral i) :: Word32)
 95 | compileExp (Var c) = do env <- getEnv
 96 |                         mov eax (variableAddress env c)
 97 | 
 98 | compileBinOp :: Exp -> Exp -> CodeGen (Ptr Int32) s a -> CodeGen (Ptr Int32) s ()
 99 | compileBinOp e1 e2 op = do compileExp e2
100 |                            push eax
101 |                            compileExp e1
102 |                            op
103 |                            add esp (4 :: Word32)
104 | 
105 | entryCode :: CodeGen e s ()
106 | entryCode = do push ebp
107 |                mov ebp esp
108 | 
109 | exitCode :: CodeGen e s ()
110 | exitCode = do mov esp ebp
111 |               pop ebp
112 |               ret
113 | 
114 | variableAddress :: Ptr Int32 -> Char -> Addr
115 | variableAddress env c =
116 |     let ofs = fromEnum c - fromEnum 'a'
117 |         env' = advancePtr env ofs
118 |     in Addr (fromIntegral (ptrToWordPtr env'))
119 | 


--------------------------------------------------------------------------------
/harpy.cabal:
--------------------------------------------------------------------------------
 1 | Cabal-version:  >=1.6
 2 | Build-type: Simple
 3 | Name:           harpy
 4 | Version:        0.6.0.4
 5 | License:        BSD3
 6 | License-file:   LICENSE
 7 | Author:         Dirk Kleeblatt <dirk.kleeblatt@gmx.de>
 8 |                 Martin Grabmueller <martin@grabmueller.de>
 9 | Maintainer:     martin@grabmueller.de
10 | Category:       Code Generation
11 | Synopsis:       Runtime code generation for x86 machine code
12 | Description:    The package contains the following components:
13 |     .
14 |     * An x86 assembler.  We provide both low-level code generation in
15 |       module "Harpy.X86CodeGen" as well as a (slightly) higher-level
16 |       implementation in module "Harpy.X86Assembler", which figures out
17 |       addressing modes based on an instruction's operand types.
18 |     .
19 |     * An x86 disassembler which knows most of the opcodes available on
20 |       modern x86 processors and can display its output both in the style
21 |       used in Intel documents an in AT&T style, like the GNU tools. The
22 |       disassembler can be found in module "Harpy.X86Disassembler".  The
23 |           disassembler is re-exported from the disassembler package for
24 |           compatibility with earlier Harpy releases.
25 |     .
26 |     * Some abstractions over the abovementioned code generation modules,
27 |       such as automatic label management and code generation
28 |       combinators (for if-then-else statements, while-loops, functions)
29 |       (module "Harpy.X86CGCombinators").
30 |     .
31 |     * All the above modules use the code generation monad defined in module
32 |       "Harpy.CodeGenMonad".
33 | homepage:            https://github.com/mgrabmueller/harpy
34 | bug-reports:         https://github.com/mgrabmueller/harpy/issues
35 | Stability:      Experimental
36 | 
37 | Extra-source-files:
38 |     NEWS README.md Makefile ChangeLog.md
39 |     doc/Makefile doc/tutorial.lhs doc/larger-tutorial.lhs
40 |     examples/evaluator/ArithTypes.hs examples/evaluator/ArithParser.hs
41 |     examples/evaluator/Evaluator.hs
42 | 
43 | source-repository head
44 |   type:     git
45 |   location: https://github.com/mgrabmueller/harpy
46 | 
47 | Library
48 |   Build-depends:
49 |     base >= 4 && < 5,
50 |     parsec >= 1 && < 4,
51 |     mtl >= 1 && < 3,
52 |     template-haskell >= 2 && < 3,
53 |     pretty >= 1 && < 2,
54 |     containers >= 0.3 && < 1,
55 |     array >= 0.3 && < 1,
56 |     disassembler >= 0.2.0.0
57 |   Exposed-Modules:
58 |     Harpy,
59 |     Harpy.X86CodeGen,
60 |     Harpy.X86Assembler,
61 |     Harpy.CodeGenMonad,
62 |     Harpy.Call,
63 |     Harpy.X86Disassembler,
64 |     Harpy.X86CGCombinators
65 |   Extensions:
66 |     ForeignFunctionInterface, MultiParamTypeClasses,
67 |     TemplateHaskell, CPP, FlexibleContexts, FlexibleInstances,
68 |     RankNTypes
69 | 
70 | 


--------------------------------------------------------------------------------