├── .gitignore ├── LICENSE ├── README.md ├── Setup.hs ├── cabal.project ├── data-bitcode-llvm.cabal ├── default.nix ├── package.yaml ├── src └── Data │ └── BitCode │ ├── FloatCast.hs │ ├── LLVM.hs │ └── LLVM │ ├── CallingConv.hs │ ├── Classes │ ├── HasType.hs │ └── ToSymbols.hs │ ├── Cmp.hs │ ├── Codes │ ├── AtomicOrdering.hs │ ├── Attribute.hs │ ├── AttributeKind.hs │ ├── ComdatSelectionKind.hs │ ├── Constants.hs │ ├── Function.hs │ ├── FunctionSummarySymtab.hs │ ├── Identification.hs │ ├── Metadata.hs │ ├── Module.hs │ ├── ModulePathSymtab.hs │ ├── OperandBundleTag.hs │ ├── SynchronizationScope.hs │ ├── Type.hs │ ├── TypeSymtab.hs │ ├── UseList.hs │ └── ValueSymtab.hs │ ├── Flags.hs │ ├── FromBitCode.hs │ ├── Function.hs │ ├── IDs │ └── Blocks.hs │ ├── Instruction.hs │ ├── Linkage.hs │ ├── Metadata.hs │ ├── Opcodes │ ├── Binary.hs │ └── Cast.hs │ ├── ParamAttr.hs │ ├── Pretty.hs │ ├── RMWOperations.hs │ ├── Reader │ └── Monad.hs │ ├── StorageClass.hs │ ├── ThreadLocalMode.hs │ ├── ToBitCode.hs │ ├── Type.hs │ ├── Types.hs │ ├── Util.hs │ ├── Value.hs │ └── Visibility.hs ├── stack.yaml └── test ├── LLVMSpec.hs ├── Tasty.hs └── fromBitcode ├── atomicload.ll ├── atomicrmw.ll ├── atomicstore.ll ├── cmpxchg.ll ├── extractvalue.ll ├── fence.ll ├── memset.ll └── switch.ll /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | dist-* 3 | cabal-dev 4 | *.o 5 | *.hi 6 | *.chi 7 | *.chs.h 8 | *.dyn_o 9 | *.dyn_hi 10 | .hpc 11 | .hsenv 12 | .cabal-sandbox/ 13 | cabal.sandbox.config 14 | *.prof 15 | *.aux 16 | *.hp 17 | *.eventlog 18 | .stack-work/ 19 | cabal.project.local 20 | test/**/*.bc 21 | test/**/*.dis 22 | GNUmakefile 23 | ghc.mk 24 | .ghc.environment.* 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Moritz Angermann (c) 2016 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of Moritz Angermann nor the names of other 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Data.BitCode.LLVM 2 | 3 | A module to produce bitcode from llvm module descriptions. 4 | 5 | # TODO 6 | 7 | ## High prio 8 | - [ ] Extend the call instruction to carry the symbol, signature 9 | and calling conv, the call is performed with, and verify 10 | that the symbols type matches the signature; and calling conv 11 | if symbol is a Function Symbol. If Symbol is a reference 12 | ensure that the signature matches. (Data.BitCode.LLVM should 13 | automatically insert the required ptr cast if the symbol does 14 | not match). 15 | Also error if neither a Reference with matching signature nor 16 | a Function. I don't think calling anything else makes any 17 | sense? 18 | - [ ] Drop the stupid Type first item from the instruction records. 19 | This should be computed using `instTy`. 20 | - [ ] Make `instTy` work for `getElementPointer`. 21 | - [ ] Write tests to verify that `instTy` works as expected. 22 | - [ ] Verify, e.g. in `mkInstRec` for `Call` (e.g. everywhere where we 23 | do not use `instTy` that the type we encode matches the one 24 | we should expect. 25 | - [ ] Fix getValue. We currently *assume* that we have no fwd references. 26 | And hence can always load the value, but llvm does not work that 27 | way for fwd references. There the value and type are encoded after 28 | each other (ValueId, TypeId). If it's non fwd reference, it's 29 | just ValueId. (see getValueTypePair in BitCodeReader). Similarly this 30 | is required to support fwd references in ToBitCode. 31 | - [x] Collapse constants. 32 | 33 | ## General 34 | - [ ] Stop handrolling your monads. (Use transformers, and derive!) 35 | - [x] Better error reporting (e.g. see preliminaries in `Data.BitCode.LLVM.Util`) 36 | Maybe using `ExceptT` to give good hints as to what faild? 37 | - [ ] More type verification. (E.g. let's try to make sure you 38 | just can not construct invalid code) 39 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /cabal.project: -------------------------------------------------------------------------------- 1 | packages: *.cabal ../data-bitcode/*.cabal 2 | -------------------------------------------------------------------------------- /data-bitcode-llvm.cabal: -------------------------------------------------------------------------------- 1 | -- This file has been generated from package.yaml by hpack version 0.14.1. 2 | -- 3 | -- see: https://github.com/sol/hpack 4 | 5 | name: data-bitcode-llvm 6 | version: 3.8.0.0 7 | synopsis: llvm bitcode reader and writer 8 | category: Code Generation 9 | homepage: https://github.com/angerman/data-llvm-bitcode#readme 10 | bug-reports: https://github.com/angerman/data-llvm-bitcode/issues 11 | author: Moritz Angermann 12 | maintainer: Moritz Angermann 13 | copyright: lichtzwerge GmbH 14 | license: BSD3 15 | license-file: LICENSE 16 | build-type: Simple 17 | cabal-version: >= 1.10 18 | 19 | source-repository head 20 | type: git 21 | location: https://github.com/angerman/data-llvm-bitcode 22 | 23 | library 24 | hs-source-dirs: 25 | src 26 | build-depends: 27 | base >= 4.7 && < 5 28 | , containers 29 | , pretty 30 | , data-bitcode 31 | , binary 32 | , array 33 | exposed-modules: 34 | Data.BitCode.FloatCast 35 | Data.BitCode.LLVM 36 | Data.BitCode.LLVM.CallingConv 37 | Data.BitCode.LLVM.Classes.HasType 38 | Data.BitCode.LLVM.Classes.ToSymbols 39 | Data.BitCode.LLVM.Cmp 40 | Data.BitCode.LLVM.Codes.AtomicOrdering 41 | Data.BitCode.LLVM.Codes.Attribute 42 | Data.BitCode.LLVM.Codes.AttributeKind 43 | Data.BitCode.LLVM.Codes.ComdatSelectionKind 44 | Data.BitCode.LLVM.Codes.Constants 45 | Data.BitCode.LLVM.Codes.Function 46 | Data.BitCode.LLVM.Codes.FunctionSummarySymtab 47 | Data.BitCode.LLVM.Codes.Identification 48 | Data.BitCode.LLVM.Codes.Metadata 49 | Data.BitCode.LLVM.Codes.Module 50 | Data.BitCode.LLVM.Codes.ModulePathSymtab 51 | Data.BitCode.LLVM.Codes.OperandBundleTag 52 | Data.BitCode.LLVM.Codes.SynchronizationScope 53 | Data.BitCode.LLVM.Codes.Type 54 | Data.BitCode.LLVM.Codes.TypeSymtab 55 | Data.BitCode.LLVM.Codes.UseList 56 | Data.BitCode.LLVM.Codes.ValueSymtab 57 | Data.BitCode.LLVM.Flags 58 | Data.BitCode.LLVM.FromBitCode 59 | Data.BitCode.LLVM.Function 60 | Data.BitCode.LLVM.IDs.Blocks 61 | Data.BitCode.LLVM.Instruction 62 | Data.BitCode.LLVM.Linkage 63 | Data.BitCode.LLVM.Metadata 64 | Data.BitCode.LLVM.Opcodes.Binary 65 | Data.BitCode.LLVM.Opcodes.Cast 66 | Data.BitCode.LLVM.ParamAttr 67 | Data.BitCode.LLVM.Pretty 68 | Data.BitCode.LLVM.Reader.Monad 69 | Data.BitCode.LLVM.RMWOperations 70 | Data.BitCode.LLVM.StorageClass 71 | Data.BitCode.LLVM.ThreadLocalMode 72 | Data.BitCode.LLVM.ToBitCode 73 | Data.BitCode.LLVM.Type 74 | Data.BitCode.LLVM.Types 75 | Data.BitCode.LLVM.Util 76 | Data.BitCode.LLVM.Value 77 | Data.BitCode.LLVM.Visibility 78 | default-language: Haskell2010 79 | 80 | test-suite spec 81 | main-is: Tasty.hs 82 | hs-source-dirs: test 83 | ghc-options: -Wall -threaded 84 | type: exitcode-stdio-1.0 85 | build-depends: base 86 | , tasty 87 | , tasty-discover 88 | , tasty-hspec 89 | , tasty-quickcheck 90 | , process 91 | , filepath 92 | , data-bitcode 93 | , data-bitcode-llvm 94 | , containers 95 | default-language: Haskell2010 96 | -------------------------------------------------------------------------------- /default.nix: -------------------------------------------------------------------------------- 1 | { mkDerivation, base, binary, data-bitcode, pretty, stdenv }: 2 | mkDerivation { 3 | pname = "data-bitcode-llvm"; 4 | version = "3.8.0.0"; 5 | src = ./.; 6 | libraryHaskellDepends = [ base binary data-bitcode pretty ]; 7 | homepage = "https://github.com/angerman/data-llvm-bitcode#readme"; 8 | description = "llvm bitcode reader and writer"; 9 | license = stdenv.lib.licenses.bsd3; 10 | } 11 | -------------------------------------------------------------------------------- /package.yaml: -------------------------------------------------------------------------------- 1 | name: data-bitcode-llvm 2 | version: 3.8.0.0 3 | synopsis: llvm bitcode reader and writer 4 | license: BSD3 5 | author: Moritz Angermann 6 | maintainer: Moritz Angermann 7 | copyright: lichtzwerge GmbH 8 | category: Code Generation 9 | 10 | github: angerman/data-llvm-bitcode 11 | 12 | dependencies: 13 | - base >= 4.7 && < 5 14 | - pretty >= 1.1 15 | - data-bitcode >= 0.1 16 | - binary >= 0.8 17 | 18 | library: 19 | source-dirs: src 20 | -------------------------------------------------------------------------------- /src/Data/BitCode/FloatCast.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts #-} 2 | 3 | -- | This module was written based on 4 | -- . 5 | -- 6 | -- Implements casting via a 1-elemnt STUArray, as described in 7 | -- . 8 | -- 9 | -- copied from binary, as binary does not expose it. 10 | 11 | module Data.BitCode.FloatCast 12 | ( floatToWord 13 | , wordToFloat 14 | , doubleToWord 15 | , wordToDouble 16 | ) where 17 | 18 | import Data.Word (Word32, Word64) 19 | import Data.Array.ST (newArray, readArray, MArray, STUArray) 20 | import Data.Array.Unsafe (castSTUArray) 21 | import GHC.ST (runST, ST) 22 | 23 | -- | Reinterpret-casts a `Float` to a `Word32`. 24 | floatToWord :: Float -> Word32 25 | floatToWord x = runST (cast x) 26 | {-# INLINE floatToWord #-} 27 | 28 | -- | Reinterpret-casts a `Word32` to a `Float`. 29 | wordToFloat :: Word32 -> Float 30 | wordToFloat x = runST (cast x) 31 | {-# INLINE wordToFloat #-} 32 | 33 | -- | Reinterpret-casts a `Double` to a `Word64`. 34 | doubleToWord :: Double -> Word64 35 | doubleToWord x = runST (cast x) 36 | {-# INLINE doubleToWord #-} 37 | 38 | -- | Reinterpret-casts a `Word64` to a `Double`. 39 | wordToDouble :: Word64 -> Double 40 | wordToDouble x = runST (cast x) 41 | {-# INLINE wordToDouble #-} 42 | 43 | cast :: (MArray (STUArray s) a (ST s), 44 | MArray (STUArray s) b (ST s)) => a -> ST s b 45 | cast x = newArray (0 :: Int, 0) x >>= castSTUArray >>= flip readArray 0 46 | {-# INLINE cast #-} 47 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE RecordWildCards #-} 2 | {-# LANGUAGE FlexibleInstances #-} 3 | {-# LANGUAGE TypeSynonymInstances #-} 4 | {-# LANGUAGE DeriveGeneric #-} 5 | module Data.BitCode.LLVM where 6 | 7 | import Data.BitCode.LLVM.ParamAttr (GroupIdx, ParamAttrGroupEntry) 8 | import Data.BitCode.LLVM.Type (Ty) 9 | import Data.BitCode.LLVM.Value (ValueSymbolTable) 10 | import Data.BitCode.LLVM.Function (Function) 11 | import Data.BitCode.LLVM.Value (Symbol, Value, symbolValue) 12 | import Data.BitCode.LLVM.Codes.Identification (Epoch) 13 | 14 | import qualified Data.BitCode.LLVM.Instruction as I 15 | import qualified Data.BitCode.LLVM.Value as V 16 | 17 | import Data.BitCode.LLVM.Classes.ToSymbols 18 | import Data.BitCode.LLVM.Classes.HasType 19 | 20 | import Data.Word (Word64) 21 | 22 | import GHC.Generics (Generic) 23 | import Data.Binary (Binary) 24 | import Data.Set (Set) 25 | 26 | --- LLVM Bit Codes ------------------------------------------------------------- 27 | -- see LLVMBitCodes.h (e.g. http://llvm.org/docs/doxygen/html/LLVMBitCodes_8h_source.html) 28 | -- 29 | -- Emacs Query Replace: 30 | -- regexp: ^[[:digit:]]+[[:space:]]+\([[:alpha:]_]+\)[[:space:]]*=[[:space:]]*\([[:digit:]]+\)[,[:space:]]*//[[:space:]]*\(.+\) 31 | -- with: -- | \3^J | \1 -- \2 32 | -- ^J: C-q C-j 33 | -- 34 | -- if you did not copy off the website, you won't need the [[:digit:]]+ in front. 35 | -- some fixing by hand is still required. As some comments span multiple lines. 36 | -- 37 | 38 | -- In general, the BlockIDs can be found in LLVM.IDs 39 | -- the coresponding Record Codes in LLVM.Codes.XYZ 40 | 41 | 42 | -- | The Identifier for the writer of this bitcode file 43 | -- to aid in error reporting. 44 | data Ident = Ident 45 | String -- ^ The name of the producer. 46 | Epoch -- ^ The llvm bitcode epoch (version). Currently only 1 (Current) is supported. 47 | deriving (Show, Generic) 48 | 49 | -- | The representation of the actual module. 50 | data Module = Module 51 | { mVersion :: Word64 -- ^ Encoding version: 0 absolute indices, 1 relative indices. 52 | , mTriple :: Maybe String -- ^ Optional triple: usually -- 53 | , mDatalayout :: Maybe String-- ^ Optional data layout string. 54 | , mValues :: [Symbol] -- ^ Globals. (Global values and constants.) 55 | -- NOTE: these are *not* used during generation. 56 | -- Only the mValues are used. 57 | , mDecls :: [Symbol] -- ^ Function declarations for functions outside of the module. 58 | , mDefns :: [Symbol] -- ^ Functions defined (bodies are in mFns) 59 | , mFns :: [Function] -- ^ Function definitions for function contained within the module. 60 | -- NOTE: while we could compute these from 61 | -- the existing values, doing so is 62 | -- rather expensive. And the constructor 63 | -- might be able to compute these directrly. 64 | , mConsts :: [Symbol] 65 | , mTypes :: [Ty] -- ^ sorted type list (in construction order) 66 | } 67 | deriving (Show, Eq, Generic) 68 | 69 | instance ToSymbols Module where 70 | symbols (Module{..}) = mValues ++ mDecls ++ concatMap symbols mFns 71 | 72 | instance HasType Symbol where 73 | ty (V.Named _ _ t _) = t 74 | ty (V.Unnamed _ t _) = t 75 | ty (V.Lazy _ t _) = t 76 | -- instance Binary Ident 77 | -- instance Binary Module 78 | 79 | -- TODO: when actually constructing a module, we might 80 | -- want a different data structure, which implicitly 81 | -- creates the type table, and replaces types with 82 | -- their respective indices. 83 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/CallingConv.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveGeneric #-} 2 | module Data.BitCode.LLVM.CallingConv where 3 | 4 | import GHC.Generics (Generic) 5 | import Data.Binary (Binary) 6 | 7 | -- TODO: Add doc, see @include/llvm/IR/CallingConv.h@ 8 | data CallingConv 9 | = C -- 0 10 | | CC_UNUSED1 | CC_UNUSED2 | CC_UNUSED3 | CC_UNUSED4 | CC_UNUSED5 | CC_UNUSED6 | CC_UNUSED7 11 | | Fast -- 8 12 | | Cold -- 9 13 | | GHC -- 10 14 | | HiPE -- 11 15 | | WebKit_JS -- 12 16 | | AnyReg -- 13 17 | | PreserveMost -- 14 18 | | PreserveAll -- 15 19 | | Swift -- 16 20 | | CxxFastTls -- 17 21 | -- NOTE: There are more (64...) 22 | deriving (Eq, Enum, Ord, Show, Generic) 23 | 24 | instance Binary CallingConv 25 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Classes/HasType.hs: -------------------------------------------------------------------------------- 1 | module Data.BitCode.LLVM.Classes.HasType where 2 | 3 | import Data.BitCode.LLVM.Type 4 | 5 | class HasType a where 6 | ty :: a -> Ty 7 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Classes/ToSymbols.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -fprof-auto #-} 2 | {-# LANGUAGE RecordWildCards #-} 3 | {-# LANGUAGE TypeSynonymInstances #-} 4 | {-# LANGUAGE FlexibleInstances #-} 5 | module Data.BitCode.LLVM.Classes.ToSymbols 6 | (ToSymbols(..)) where 7 | 8 | import Data.BitCode.LLVM.Value as V 9 | import Data.BitCode.LLVM.Function as F 10 | import Data.BitCode.LLVM.Instruction as I 11 | import Data.Maybe (catMaybes) 12 | import Data.Foldable (foldl') 13 | 14 | -- I guess this should just be Foldable or Traversable. 15 | 16 | -- | Return the symbols of a given data type. 17 | class ToSymbols a where 18 | symbols :: a -> [Symbol] 19 | fsymbols :: [Symbol] -> a -> [Symbol] 20 | 21 | instance (ToSymbols a) => ToSymbols [a] where 22 | symbols = concatMap symbols 23 | fsymbols s_ = foldl' fsymbols s_ 24 | 25 | instance ToSymbols Value where 26 | symbols V.Global{..} | Just s <- gInit = symbols s 27 | symbols V.Function{..} = concatMap symbols $ catMaybes [fePrologueData fExtra, fePrefixData fExtra] 28 | symbols V.Alias{..} = symbols aVal 29 | symbols V.Constant{..} = symbols cConst 30 | symbols _ = [] 31 | 32 | fsymbols s_ V.Global{..} | Just s <- gInit = fsymbols s_ s 33 | fsymbols s_ V.Function{..} = foldl' fsymbols s_ (catMaybes [fePrologueData fExtra, fePrefixData fExtra]) 34 | fsymbols s_ V.Alias{..} = fsymbols s_ aVal 35 | fsymbols s_ V.Constant{..} = fsymbols s_ cConst 36 | fsymbols s_ _ = s_ 37 | 38 | instance ToSymbols Const where 39 | symbols (V.Array ss) = concatMap symbols ss 40 | symbols (V.Vector ss) = concatMap symbols ss 41 | symbols (V.Struct ss) = concatMap symbols ss 42 | symbols (V.BinOp _ s s') = symbols s ++ symbols s' 43 | symbols (V.Cast _ _ s) = symbols s 44 | symbols (V.InboundsGep _ ss) = concatMap symbols ss 45 | symbols _ = [] 46 | fsymbols s_ (V.Array ss) = foldl' fsymbols s_ ss 47 | fsymbols s_ (V.Vector ss) = foldl' fsymbols s_ ss 48 | fsymbols s_ (V.Struct ss) = foldl' fsymbols s_ ss 49 | fsymbols s_ (V.BinOp _ s s') = foldl' fsymbols s_ [s, s'] 50 | fsymbols s_ (V.Cast _ _ s) = foldl' fsymbols s_ [s] 51 | fsymbols s_ (V.InboundsGep _ ss) = foldl' fsymbols s_ ss 52 | fsymbols s_ _ = s_ 53 | 54 | 55 | instance ToSymbols BlockInst where 56 | symbols (Just s, i) = s:symbols i 57 | symbols (Nothing, i) = symbols i 58 | fsymbols s_ (Just s, i) = fsymbols (fsymbols s_ s) i 59 | fsymbols s_ (Nothing, i) = fsymbols s_ i 60 | 61 | instance ToSymbols BasicBlock where 62 | symbols (BasicBlock insts) = concatMap symbols insts 63 | fsymbols s_ (BasicBlock insts) = foldl' fsymbols s_ insts 64 | 65 | instance ToSymbols Function where 66 | -- TODO: do we want to apply symbols on the result instead of only to const? 67 | symbols (F.Function sig const body) = symbols sig ++ concatMap symbols const ++ concatMap symbols body 68 | fsymbols s_ (F.Function sig const body) = foldl' fsymbols (foldl' fsymbols s_ (sig:const)) body 69 | 70 | instance ToSymbols Symbol where 71 | symbols s = s:symbols (symbolValue s) 72 | fsymbols s_ s | s `elem` s_ = s_ 73 | | otherwise = fsymbols (s:s_) (symbolValue s) 74 | 75 | instance ToSymbols Inst where 76 | symbols (I.Alloca _ s _) = [s] 77 | symbols (I.Cast _ _ s) = [s] 78 | symbols (I.Load _ s _) = [s] 79 | symbols (I.Store s s' _) = [s,s'] 80 | symbols (I.Call _ _ _ s _ ss) = s:ss 81 | symbols (I.Cmp2 _ s s' _) = [s,s'] 82 | symbols (I.Gep _ _ s ss) = s:ss 83 | symbols (I.ExtractValue s _) = [s] 84 | symbols (I.Ret (Just s)) = [s] 85 | symbols (I.Ret Nothing) = [] 86 | symbols (I.UBr _) = [] 87 | symbols (I.Br s _ _) = [s] 88 | symbols (I.BinOp _ _ l r _) = [l, r] 89 | symbols (I.Switch s _ sbs) = s:map fst sbs 90 | symbols (I.CmpXchg s s' s'' _ _ _) = [s,s',s''] 91 | symbols (I.Fence _ _) = [] 92 | symbols (I.AtomicRMW s s' _ _ _) = [s,s'] 93 | symbols (I.AtomicStore s s' _ _ _) = [s, s'] 94 | symbols (I.AtomicLoad _ s _ _ _) = [s] 95 | 96 | fsymbols s_ (I.Alloca _ s _) = fsymbols s_ s 97 | fsymbols s_ (I.Cast _ _ s) = fsymbols s_ s 98 | fsymbols s_ (I.Load _ s _) = fsymbols s_ s 99 | fsymbols s_ (I.Store s s' _) = foldl' fsymbols s_ [s,s'] 100 | fsymbols s_ (I.Call _ _ _ s _ ss) = foldl' fsymbols s_ (s:ss) 101 | fsymbols s_ (I.Cmp2 _ s s' _) = foldl' fsymbols s_ [s,s'] 102 | fsymbols s_ (I.Gep _ _ s ss) = foldl' fsymbols s_ (s:ss) 103 | fsymbols s_ (I.ExtractValue s _) = fsymbols s_ s 104 | fsymbols s_ (I.Ret (Just s)) = fsymbols s_ s 105 | fsymbols s_ (I.Ret Nothing) = s_ 106 | fsymbols s_ (I.UBr _) = s_ 107 | fsymbols s_ (I.Br s _ _) = fsymbols s_ s 108 | fsymbols s_ (I.BinOp _ _ l r _) = foldl' fsymbols s_ [l, r] 109 | fsymbols s_ (I.Switch s _ sbs) = foldl' fsymbols s_ (s:map fst sbs) 110 | fsymbols s_ (I.CmpXchg p c n _ _ _) = foldl' fsymbols s_ [p, c, n] 111 | fsymbols s_ (I.Fence _ _) = s_ 112 | fsymbols s_ (I.AtomicRMW s s' _ _ _) = foldl' fsymbols s_ [s, s'] 113 | fsymbols s_ (I.AtomicStore s s' _ _ _) = foldl' fsymbols s_ [s, s'] 114 | fsymbols s_ (I.AtomicLoad _ s _ _ _) = fsymbols s_ s 115 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Cmp.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveGeneric #-} 2 | module Data.BitCode.LLVM.Cmp where 3 | 4 | import GHC.Generics (Generic) 5 | import Data.Binary (Binary) 6 | 7 | -- | This enumeration lists the possible predicates for CmpInst subclasses. 8 | -- Values in the range 0-31 are reserved for FCmpInst, while values in the 9 | -- range 32-64 are reserved for ICmpInst. This is necessary to ensure the 10 | -- predicate values are not overlapping between the classes. 11 | data Predicate 12 | -- Opcode U L G E Intuitive operation 13 | = FCMP_FALSE -- = 0, ///< 0 0 0 0 Always false (always folded) 14 | | FCMP_OEQ -- = 1, ///< 0 0 0 1 True if ordered and equal 15 | | FCMP_OGT -- = 2, ///< 0 0 1 0 True if ordered and greater than 16 | | FCMP_OGE -- = 3, ///< 0 0 1 1 True if ordered and greater than or equal 17 | | FCMP_OLT -- = 4, ///< 0 1 0 0 True if ordered and less than 18 | | FCMP_OLE -- = 5, ///< 0 1 0 1 True if ordered and less than or equal 19 | | FCMP_ONE -- = 6, ///< 0 1 1 0 True if ordered and operands are unequal 20 | | FCMP_ORD -- = 7, ///< 0 1 1 1 True if ordered (no nans) 21 | | FCMP_UNO -- = 8, ///< 1 0 0 0 True if unordered: isnan(X) | isnan(Y) 22 | | FCMP_UEQ -- = 9, ///< 1 0 0 1 True if unordered or equal 23 | | FCMP_UGT -- = 10, ///< 1 0 1 0 True if unordered or greater than 24 | | FCMP_UGE -- = 11, ///< 1 0 1 1 True if unordered, greater than, or equal 25 | | FCMP_ULT -- = 12, ///< 1 1 0 0 True if unordered or less than 26 | | FCMP_ULE -- = 13, ///< 1 1 0 1 True if unordered, less than, or equal 27 | | FCMP_UNE -- = 14, ///< 1 1 1 0 True if unordered or not equal 28 | | FCMP_TRUE -- = 15, ///< 1 1 1 1 Always true (always folded) 29 | -- FIRST_FCMP_PREDICATE = FCMP_FALSE, 30 | -- LAST_FCMP_PREDICATE = FCMP_TRUE, 31 | -- BAD_FCMP_PREDICATE = FCMP_TRUE + 1, 32 | | SKIP_16 | SKIP_17 | SKIP_18 | SKIP_19 | SKIP_20 | SKIP_21 | SKIP_22 | SKIP_23 33 | | SKIP_24 | SKIP_25 | SKIP_26 | SKIP_27 | SKIP_28 | SKIP_29 | SKIP_30 | SKIP_31 34 | | ICMP_EQ -- = 32, ///< equal 35 | | ICMP_NE -- = 33, ///< not equal 36 | | ICMP_UGT -- = 34, ///< unsigned greater than 37 | | ICMP_UGE -- = 35, ///< unsigned greater or equal 38 | | ICMP_ULT -- = 36, ///< unsigned less than 39 | | ICMP_ULE -- = 37, ///< unsigned less or equal 40 | | ICMP_SGT -- = 38, ///< signed greater than 41 | | ICMP_SGE -- = 39, ///< signed greater or equal 42 | | ICMP_SLT -- = 40, ///< signed less than 43 | | ICMP_SLE -- = 41, ///< signed less or equal 44 | deriving (Eq, Enum, Show, Generic) 45 | -- FIRST_ICMP_PREDICATE = ICMP_EQ, 46 | -- LAST_ICMP_PREDICATE = ICMP_SLE, 47 | -- BAD_ICMP_PREDICATE = ICMP_SLE + 1 48 | 49 | instance Binary Predicate 50 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Codes/AtomicOrdering.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveGeneric #-} 2 | module Data.BitCode.LLVM.Codes.AtomicOrdering where 3 | 4 | import GHC.Generics (Generic) 5 | import Data.Binary (Binary) 6 | 7 | -- | Encoded AtomicOrdering values. 8 | data AtomicOrdering 9 | = ORDERING_NOTATOMIC -- 0 10 | | ORDERING_UNORDERED -- 1 11 | | ORDERING_MONOTONIC -- 2 12 | | ORDERING_ACQUIRE -- 3 13 | | ORDERING_RELEASE -- 4 14 | | ORDERING_ACQREL -- 5 15 | | ORDERING_SEQCST -- 6 16 | deriving (Show, Enum, Eq, Ord, Generic) 17 | 18 | instance Binary AtomicOrdering 19 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Codes/Attribute.hs: -------------------------------------------------------------------------------- 1 | module Data.BitCode.LLVM.Codes.Attribute where 2 | 3 | -- | PARAMATTR blocks have code for defining a parameter attribute set. 4 | data AttributeCode 5 | = PARAMATTR_UNUSED0 -- 0 6 | -- | ENTRY: [paramidx0, attr0, paramidx1, attr1...]. WARN: Will be removed in 4.0 7 | | PARAMATTR_CODE_ENTRY_OLD -- 1 8 | -- | ENTRY: [attrgrp0, attrgrp1, ...] 9 | | PARAMATTR_CODE_ENTRY -- 2 10 | -- | ENTRY: [id, idx, attr0, att1, ...] 11 | | PARAMATTR_GRP_CODE_ENTRY -- 3 12 | deriving (Show, Enum) 13 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Codes/AttributeKind.hs: -------------------------------------------------------------------------------- 1 | module Data.BitCode.LLVM.Codes.AttributeKind where 2 | 3 | data AttributeKind 4 | = ATTR_KIND_UNUSED0 -- 0 5 | | ATTR_KIND_ALIGNMENT -- 1 6 | | ATTR_KIND_ALWAYS_INLINE -- 2 7 | | ATTR_KIND_BY_VAL -- 3 8 | | ATTR_KIND_INLINE_HINT -- 4 9 | | ATTR_KIND_IN_REG -- 5 10 | | ATTR_KIND_MIN_SIZE -- 6 11 | | ATTR_KIND_NAKED -- 7 12 | | ATTR_KIND_NEST -- 8 13 | | ATTR_KIND_NO_ALIAS -- 9 14 | | ATTR_KIND_NO_BUILTIN -- 10 15 | | ATTR_KIND_NO_CAPTURE -- 11 16 | | ATTR_KIND_NO_DUPLICATE -- 12 17 | | ATTR_KIND_NO_IMPLICIT_FLOAT -- 13 18 | | ATTR_KIND_NO_INLINE -- 14 19 | | ATTR_KIND_NON_LAZY_BIND -- 15 20 | | ATTR_KIND_NO_RED_ZONE -- 16 21 | | ATTR_KIND_NO_RETURN -- 17 22 | | ATTR_KIND_NO_UNWIND -- 18 23 | | ATTR_KIND_OPTIMIZE_FOR_SIZE -- 19 24 | | ATTR_KIND_READ_NONE -- 20 25 | | ATTR_KIND_READ_ONLY -- 21 26 | | ATTR_KIND_RETURNED -- 22 27 | | ATTR_KIND_RETURNS_TWICE -- 23 28 | | ATTR_KIND_S_EXT -- 24 29 | | ATTR_KIND_STACK_ALIGNMENT -- 25 30 | | ATTR_KIND_STACK_PROTECT -- 26 31 | | ATTR_KIND_STACK_PROTECT_REQ -- 27 32 | | ATTR_KIND_STACK_PROTECT_STRONG -- 28 33 | | ATTR_KIND_STRUCT_RET -- 29 34 | | ATTR_KIND_SANITIZE_ADDRESS -- 30 35 | | ATTR_KIND_SANITIZE_THREAD -- 31 36 | | ATTR_KIND_SANITIZE_MEMORY -- 32 37 | | ATTR_KIND_UW_TABLE -- 33 38 | | ATTR_KIND_Z_EXT -- 34 39 | | ATTR_KIND_BUILTIN -- 35 40 | | ATTR_KIND_COLD -- 36 41 | | ATTR_KIND_OPTIMIZE_NONE -- 37 42 | | ATTR_KIND_IN_ALLOCA -- 38 43 | | ATTR_KIND_NON_NULL -- 39 44 | | ATTR_KIND_JUMP_TABLE -- 40 45 | | ATTR_KIND_DEREFERENCEABLE -- 41 46 | | ATTR_KIND_DEREFERENCEABLE_OR_NULL -- 42 47 | | ATTR_KIND_CONVERGENT -- 43 48 | | ATTR_KIND_SAFESTACK -- 44 49 | | ATTR_KIND_ARGMEMONLY -- 45 50 | | ATTR_KIND_SWIFT_SELF -- 46 51 | | ATTR_KIND_SWIFT_ERROR -- 47 52 | | ATTR_KIND_NO_RECURSE -- 48 53 | | ATTR_KIND_INACCESSIBLEMEM_ONLY -- 49 54 | | ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY -- 50 55 | deriving (Show, Enum) 56 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Codes/ComdatSelectionKind.hs: -------------------------------------------------------------------------------- 1 | module Data.BitCode.LLVM.Codes.ComdatSelectionKind where 2 | 3 | data ComdatSelectionKind 4 | = COMDAT_SELECTION_KIND_UNUSED0 5 | | COMDAT_SELECTION_KIND_ANY -- 1 6 | | COMDAT_SELECTION_KIND_EXACT_MATCH -- 2 7 | | COMDAT_SELECTION_KIND_LARGEST -- 3 8 | | COMDAT_SELECTION_KIND_NO_DUPLICATES -- 4 9 | | COMDAT_SELECTION_KIND_SAME_SIZE -- 5 10 | deriving (Show, Enum) 11 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Codes/Constants.hs: -------------------------------------------------------------------------------- 1 | module Data.BitCode.LLVM.Codes.Constants where 2 | 3 | -- | The constants block (CONSTANTS_BLOCK_ID) describes emission for each 4 | -- constant and maintains an implicit current type value. 5 | data Constant 6 | = CST_CODE_UNUSED0 -- 0 7 | -- | SETTYPE: [typeid] 8 | | CST_CODE_SETTYPE -- 1 9 | -- | NULL 10 | | CST_CODE_NULL -- 2 11 | -- | UNDEF 12 | | CST_CODE_UNDEF -- 3 13 | -- | INTEGER: [intval] 14 | | CST_CODE_INTEGER -- 4 15 | -- | WIDE_INTEGER: [n x intval] 16 | | CST_CODE_WIDE_INTEGER -- 5 17 | -- | FLOAT: [fpval] 18 | | CST_CODE_FLOAT -- 6 19 | -- | AGGREGATE: [n x value number] 20 | | CST_CODE_AGGREGATE -- 7 21 | -- | STRING: [values] 22 | | CST_CODE_STRING -- 8 23 | -- | CSTRING: [values] 24 | | CST_CODE_CSTRING -- 9 25 | -- | CE_BINOP: [opcode, opval, opval] 26 | | CST_CODE_CE_BINOP -- 10 27 | -- | CE_CAST: [opcode, opty, opval] 28 | | CST_CODE_CE_CAST -- 11 29 | -- | CE_GEP: [n x operands] 30 | | CST_CODE_CE_GEP -- 12 31 | -- | CE_SELECT: [opval, opval, opval] 32 | | CST_CODE_CE_SELECT -- 13 33 | -- | CE_EXTRACTELT: [opty, opval, opval] 34 | | CST_CODE_CE_EXTRACTELT -- 14 35 | -- | CE_INSERTELT: [opval, opval, opval] 36 | | CST_CODE_CE_INSERTELT -- 15 37 | -- | CE_SHUFFLEVEC: [opval, opval, opval] 38 | | CST_CODE_CE_SHUFFLEVEC -- 16 39 | -- | CE_CMP: [opty, opval, opval, pred] 40 | | CST_CODE_CE_CMP -- 17 41 | -- | INLINEASM: [sideeffect|alignstack,asmstr,conststr] 42 | | CST_CODE_INLINEASM_OLD -- 18 43 | -- | SHUFVEC_EX: [opty, opval, opval, opval] 44 | | CST_CODE_CE_SHUFVEC_EX -- 19 45 | -- | INBOUNDS_GEP: [n x operands] 46 | | CST_CODE_CE_INBOUNDS_GEP -- 20 47 | -- | CST_CODE_BLOCKADDRESS [fnty, fnval, bb#] 48 | | CST_CODE_BLOCKADDRESS -- 21 49 | -- | DATA: [n x elements] 50 | | CST_CODE_DATA -- 22 51 | -- | INLINEASM: [sideeffect|alignstack|asmdialect,asmstr,conststr] 52 | | CST_CODE_INLINEASM -- 23 53 | deriving (Show, Enum) 54 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Codes/Function.hs: -------------------------------------------------------------------------------- 1 | module Data.BitCode.LLVM.Codes.Function where 2 | 3 | -- | The function body block (FUNCTION_BLOCK_ID) describes function bodies. It 4 | -- can contain a constant block (CONSTANTS_BLOCK_ID). 5 | data Instruction 6 | = INST_UNUSED0 7 | -- | DECLAREBLOCKS: [n] 8 | | DECLAREBLOCKS -- 1 9 | -- | BINOP: [opcode, ty, opval, opval] 10 | | INST_BINOP -- 2 11 | -- | CAST: [opcode, ty, opty, opval] 12 | | INST_CAST -- 3 13 | -- | GEP: [n x operands] 14 | | INST_GEP_OLD -- 4 15 | -- | SELECT: [ty, opval, opval, opval] 16 | | INST_SELECT -- 5 17 | -- | EXTRACTELT: [opty, opval, opval] 18 | | INST_EXTRACTELT -- 6 19 | -- | INSERTELT: [ty, opval, opval, opval] 20 | | INST_INSERTELT -- 7 21 | -- | SHUFFLEVEC: [ty, opval, opval, opval] 22 | | INST_SHUFFLEVEC -- 8 23 | -- | CMP: [opty, opval, opval, pred] 24 | | INST_CMP -- 9 25 | -- | RET: [opty,opval] 26 | | INST_RET -- 10 27 | -- | BR: [bb#, bb#, cond] or [bb#] 28 | | INST_BR -- 11 29 | -- | SWITCH: [opty, op0, op1, ...] 30 | | INST_SWITCH -- 12 31 | -- | INVOKE: [attr, fnty, op0,op1, ...] 32 | | INST_INVOKE -- 13 33 | | INST_UNUSED14 34 | -- | UNREACHABLE 35 | | INST_UNREACHABLE -- 15 36 | -- | PHI: [ty, val0,bb0, ...] 37 | | INST_PHI -- 16 38 | | INST_UNUSED17 39 | | INST_UNUSED18 40 | -- | ALLOCA: [instty, opty, op, align] 41 | | INST_ALLOCA -- 19 42 | -- | LOAD: [opty, op, align, vol] 43 | | INST_LOAD -- 20 44 | | INST_UNUSED21 45 | | INST_UNUSED22 46 | -- | VAARG: [valistty, valist, instty] 47 | | INST_VAARG -- 23 48 | -- | This store code encodes the pointer type, rather than the value type 49 | -- this is so information only available in the pointer type (e.g. address 50 | -- spaces) is retained. 51 | -- | STORE: [ptrty,ptr,val, align, vol] 52 | | INST_STORE_OLD -- 24 53 | | INST_UNUSED25 54 | -- | EXTRACTVAL: [n x operands] 55 | | INST_EXTRACTVAL -- 26 56 | -- | INSERTVAL: [n x operands] 57 | | INST_INSERTVAL -- 27 58 | -- | fcmp/icmp returning Int1TY or vector of Int1Ty. Same as CMP, exists to 59 | -- support legacy vicmp/vfcmp instructions. 60 | -- | CMP2: [opty, opval, opval, pred] 61 | | INST_CMP2 -- 28 62 | -- | new select on i1 or [N x i1] 63 | -- | VSELECT: [ty,opval,opval,predty,pred] 64 | | INST_VSELECT -- 29 65 | -- | INBOUNDS_GEP: [n x operands] 66 | | INST_INBOUNDS_GEP_OLD -- 30 67 | -- | INDIRECTBR: [opty, op0, op1, ...] 68 | | INST_INDIRECTBR -- 31 69 | | INST_UNUSED32 70 | -- | DEBUG_LOC_AGAIN 71 | | DEBUG_LOC_AGAIN -- 33 72 | -- | CALL: [paramattrs, cc[, fmf][, fnty], fnid, arg0, arg1...] 73 | -- fast math flag is set if the CallMarker FMF is present in the cc. 74 | -- fnty is set if the CallMaker ExplicitType is set. 75 | | INST_CALL -- 34 76 | -- | DEBUG_LOC: [Line,Col,ScopeVal, IAVal] 77 | | DEBUG_LOC -- 35 78 | -- | FENCE: [ordering, synchscope] 79 | | INST_FENCE -- 36 80 | -- | CMPXCHG: [ptrty,ptr,cmp,new, align, vol, ordering, synchscope] 81 | | INST_CMPXCHG_OLD -- 37 82 | -- | ATOMICRMW: [ptrty,ptr,val, operation, align, vol,ordering, synchscope] 83 | | INST_ATOMICRMW -- 38 84 | -- | RESUME: [opval] 85 | | INST_RESUME -- 39 86 | -- | LANDINGPAD: [ty,val,val,num,id0,val0...] 87 | | INST_LANDINGPAD_OLD -- 40 88 | -- | LOAD: [opty, op, align, vol,ordering, synchscope] 89 | | INST_LOADATOMIC -- 41 90 | -- | STORE: [ptrty,ptr,val, align, vol, ordering, synchscope] 91 | | INST_STOREATOMIC_OLD -- 42 92 | -- | GEP: [inbounds, n x operands] 93 | | INST_GEP -- 43 94 | -- | STORE: [ptrty,ptr,valty,val, align, vol] 95 | | INST_STORE -- 44 96 | -- | STORE: [ptrty,ptr,val, align, vol 97 | | INST_STOREATOMIC -- 45 98 | -- | CMPXCHG: [ptrty,ptr,valty,cmp,new, align,vol,ordering,synchscope] 99 | | INST_CMPXCHG -- 46 100 | -- | LANDINGPAD: [ty,val,num,id0,val0...] 101 | | INST_LANDINGPAD -- 47 102 | -- | CLEANUPRET: [val] or [val,bb#] 103 | | INST_CLEANUPRET -- 48 104 | -- | CATCHRET: [val,bb#] 105 | | INST_CATCHRET -- 49 106 | -- | CATCHPAD: [bb#,bb#,num,args...] 107 | | INST_CATCHPAD -- 50 108 | -- | CLEANUPPAD: [num,args...] 109 | | INST_CLEANUPPAD -- 51 110 | -- | CATCHSWITCH: [num,args...] or [num,args...,bb] 111 | | INST_CATCHSWITCH -- 52 112 | | INST_UNUSED53 113 | | INST_UNUSED54 114 | -- | OPERAND_BUNDLE: [tag#, value...] 115 | | OPERAND_BUNDLE -- 55 116 | deriving (Show, Enum) 117 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Codes/FunctionSummarySymtab.hs: -------------------------------------------------------------------------------- 1 | module Data.BitCode.LLVM.Codes.FunctionSummarySymtab where 2 | 3 | -- | The function summary section uses different codes in the per-module 4 | -- and combined index cases. 5 | data FunctionSummarySymtab 6 | = FS_CODE_UNUSED0 -- 0 7 | -- | FS_ENTRY: @[valueid, islocal, instcount]@ 8 | | FS_CODE_PERMODULE_ENTRY -- 1 9 | -- | FS_ENTRY: @[modid, instcount]@ 10 | | FS_CODE_COMBINED_ENTRY -- 2 11 | deriving (Show, Enum) 12 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Codes/Identification.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveGeneric #-} 2 | module Data.BitCode.LLVM.Codes.Identification where 3 | 4 | import GHC.Generics (Generic) 5 | import Data.Binary (Binary) 6 | 7 | -- | Identification block contains a string that describes the producer details, 8 | -- and an epoch that defines the auto-upgrade capability. 9 | data IdentificationCodes 10 | -- There is no 0. 11 | = UNUSED0 -- 0 12 | -- | IDENTIFICATION: @[strchr x N]@ 13 | | STRING -- 1 14 | -- | EPOCH: @[epoch#]@ -- see @Epoch@ 15 | | EPOCH -- 2 16 | deriving (Show, Enum, Generic) 17 | 18 | instance Binary IdentificationCodes 19 | 20 | -- | The epoch that defines the auto-upgrade compatibility for the bitcode. 21 | -- 22 | -- LLVM guarantees in a major release that a minor release can read bitcode 23 | -- generated by previous minor releases. We translate this by making the reader 24 | -- accepting only bitcode with the same epoch, except for the X.0 release which 25 | -- also accepts N-1. 26 | data Epoch 27 | -- | The current bitcode epoch 28 | = Current -- 0 29 | deriving (Show, Enum, Generic) 30 | 31 | instance Binary Epoch 32 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Codes/Metadata.hs: -------------------------------------------------------------------------------- 1 | module Data.BitCode.LLVM.Codes.Metadata where 2 | 3 | data Metadata 4 | = METADATA_UNUSED0 -- 0 5 | -- | MDSTRING: [values] 6 | | METADATA_STRING -- 1 7 | -- | VALUE: [type num, value num] 8 | | METADATA_VALUE -- 2 9 | -- | NODE: [n x md num] 10 | | METADATA_NODE -- 3 11 | -- | STRING: [values] 12 | | METADATA_NAME -- 4 13 | -- | DISTINCT_NODE: [n x md num] 14 | | METADATA_DISTINCT_NODE -- 5 15 | -- | [n x [id, name]] 16 | | METADATA_KIND -- 6 17 | -- | [distinct, line, col, scope, inlined-at?] 18 | | METADATA_LOCATION -- 7 19 | -- | OLD_NODE: [n x (type num, value num)] 20 | | METADATA_OLD_NODE -- 8 21 | -- | OLD_FN_NODE: [n x (type num, value num)] 22 | | METADATA_OLD_FN_NODE -- 9 23 | -- | NAMED_NODE: [n x mdnodes] 24 | | METADATA_NAMED_NODE -- 10 25 | -- | [m x [value, [n x [id, mdnode]]] 26 | | METADATA_ATTACHMENT -- 11 27 | -- | [distinct, tag, vers, header, n x md num] 28 | | METADATA_GENERIC_DEBUG -- 12 29 | -- | [distinct, count, lo] 30 | | METADATA_SUBRANGE -- 13 31 | -- | [distinct, value, name] 32 | | METADATA_ENUMERATOR -- 14 33 | -- | [distinct, tag, name, size, align, enc] 34 | | METADATA_BASIC_TYPE -- 15 35 | -- | [distinct, filename, directory] 36 | | METADATA_FILE -- 16 37 | -- | [distinct, ...] 38 | | METADATA_DERIVED_TYPE -- 17 39 | -- | [distinct, ...] 40 | | METADATA_COMPOSITE_TYPE -- 18 41 | -- | [distinct, flags, types] 42 | | METADATA_SUBROUTINE_TYPE -- 19 43 | -- | [distinct, ...] 44 | | METADATA_COMPILE_UNIT -- 20 45 | -- | [distinct, ...] 46 | | METADATA_SUBPROGRAM -- 21 47 | -- | [distinct, scope, file, line, column] 48 | | METADATA_LEXICAL_BLOCK -- 22 49 | -- | [distinct, scope, file, discriminator] 50 | | METADATA_LEXICAL_BLOCK_FILE -- 23 51 | -- | [distinct, scope, file, name, line] 52 | | METADATA_NAMESPACE -- 24 53 | -- | [distinct, scope, name, type, ...] 54 | | METADATA_TEMPLATE_TYPE -- 25 55 | -- | [distinct, scope, name, type, value, ...] 56 | | METADATA_TEMPLATE_VALUE -- 26 57 | -- | [distinct, ...] 58 | | METADATA_GLOBAL_VAR -- 27 59 | -- | [distinct, ...] 60 | | METADATA_LOCAL_VAR -- 28 61 | -- | [distinct, n x element] 62 | | METADATA_EXPRESSION -- 29 63 | -- | [distinct, name, file, line, ...] 64 | | METADATA_OBJC_PROPERTY -- 30 65 | -- | [distinct, tag, scope, entity, line, name] 66 | | METADATA_IMPORTED_ENTITY -- 31 67 | -- | [distinct, scope, name, ...] 68 | | METADATA_MODULE -- 32 69 | -- | [distinct, macinfo, line, name, value] 70 | | METADATA_MACRO -- 33 71 | -- | [distinct, macinfo, line, file, ...] 72 | | METADATA_MACRO_FILE -- 34 73 | deriving (Show, Enum) 74 | 75 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Codes/Module.hs: -------------------------------------------------------------------------------- 1 | module Data.BitCode.LLVM.Codes.Module where 2 | 3 | -- | MODULE blocks have a number of optional fields and subblocks. 4 | data ModuleCode 5 | = SKIP -- 0 6 | -- | VERSION: [version#] 7 | | VERSION -- 1 8 | -- | TRIPLE: [strchr x N] 9 | | TRIPLE -- 2 10 | -- | DATALAYOUT: [strchr x N] 11 | | DATALAYOUT -- 3 12 | -- | ASM: [strchr x N] 13 | | ASM -- 4 14 | -- | SECTIONNAME: [strchr x N] 15 | | SECTIONNAME -- 5 16 | -- | DEPLIB: [strchr x N]. WARN: Will be removed in 4.0. 17 | | DEPLIB -- 6 18 | -- | GLOBALVAR: [pointer type, isconst, initid, linkage, alignment, section, visibility, threadlocal, unnamed_addr, externally_initialized, dllstorageclass, comdat] 19 | | GLOBALVAR -- 7 20 | -- | FUNCTION: v1: [type, callingconv, isproto, linkage, paramattr, alignment, section, visibility, gc, unnamed_addr, prologuedata, dllstorageclass, comdat, prefixdata, personality] 21 | -- v2: [strtab_offset, strtab_size] ++ v1 22 | | FUNCTION -- 8, 23 | -- | ALIAS: [alias type, aliasee val#, linkage, visibility] 24 | | ALIAS_OLD -- 9 25 | -- | PURGEVALS: [numvals] 26 | | PURGEVALS -- 10 27 | -- | GCNAME: [strchr x N] 28 | | GCNAME -- 11 29 | -- | COMDAT: v1: [selection_kind, name] 30 | -- v2: [strtab_offset, strtab_size, selection_kind] 31 | | COMDAT -- 12 32 | -- | VSTOFFSET: [offset] 33 | | VSTOFFSET -- 13 34 | -- | ALIAS: [alias value type, addrspace, aliasee val#, linkage, visibility] 35 | | ALIAS -- 14 36 | -- | METADATA_VALUES_UNUSED 37 | | METADATA_VALUES_UNUSED -- 15 38 | -- | SOURCE_FILENAME: [namechar x N] 39 | | SOURCE_FILENAME -- 16 40 | -- | HASH: [5*i32] 41 | | HASH -- 17 42 | -- | IFUNC: [ifunc value type, addrspace, resolver val#, linkage, visibility] 43 | | IFUNC -- 18 44 | deriving (Show, Enum) 45 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Codes/ModulePathSymtab.hs: -------------------------------------------------------------------------------- 1 | module Data.BitCode.LLVM.Codes.ModulePathSymtab where 2 | 3 | -- | The module path symbol table only has one code (MST_CODE_ENTRY). 4 | data ModulePathSymtab 5 | = MST_CODE_UNUSED0 -- 0 6 | -- | MST_ENTRY: @[modid, namechar x N]@ 7 | | MST_CODE_ENTRY -- 1 8 | deriving (Show, Enum) 9 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Codes/OperandBundleTag.hs: -------------------------------------------------------------------------------- 1 | module Data.BitCode.LLVM.Codes.OperandBundleTag where 2 | 3 | data OperandBundleTag 4 | = OPERAND_BUNDLE_UNUSED0 -- 0 5 | -- | TAG: @[strchr x N]@ 6 | | OPERAND_BUNDLE_TAG -- 1 7 | deriving (Show, Enum) 8 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Codes/SynchronizationScope.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveGeneric #-} 2 | module Data.BitCode.LLVM.Codes.SynchronizationScope where 3 | 4 | import GHC.Generics (Generic) 5 | import Data.Binary (Binary) 6 | 7 | -- | Encoded SynchronizationScope values. 8 | -- LLVM at some point gaind the ability to 9 | -- encode custom sync scope ids. Those are 10 | -- aggreagted in the sync scope ids, and 11 | -- start at offset 2. 12 | data AtomicSynchScope 13 | = SINGLE_THREAD -- 0 14 | | CROSS_THREAD -- 1 -- also known as SyncScope::System 15 | deriving (Show, Enum, Eq, Ord, Generic) 16 | 17 | instance Binary AtomicSynchScope 18 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Codes/Type.hs: -------------------------------------------------------------------------------- 1 | module Data.BitCode.LLVM.Codes.Type where 2 | 3 | -- | = TYPE blocks have codes for each type primitive they use. 4 | data Type 5 | = UNUSED0 -- 0 6 | -- | NUMENTRY: @[numentries]@ 7 | | NUMENTRY -- 1 8 | -- | == Type Codes 9 | -- VOID 10 | | VOID -- 2 11 | -- | FLOAT 12 | | FLOAT -- 3 13 | -- | DOUBLE 14 | | DOUBLE -- 4 15 | -- | LABEL 16 | | LABEL -- 5 17 | -- | OPAQUE 18 | | OPAQUE -- 6 19 | -- | INTEGER: @[width]@ 20 | | INTEGER -- 7 21 | -- | POINTER: @[pointee type, address space]@ -- address space is optional, defaults to 0. 22 | | POINTER -- 8 23 | -- | FUNCTION: @[vararg, attrid, retty, paramty x N]@ 24 | | FUNCTION_OLD -- 9 25 | -- | HALF 26 | | HALF -- 10 27 | -- | ARRAY: [numelts, eltty] 28 | | ARRAY -- 11 29 | -- | VECTOR: [numelts, eltty] 30 | | VECTOR -- 12 31 | -- | These are not with the other floating point types because they're 32 | -- a late addition, and putting them in the right place breaks 33 | -- binary compatibility. 34 | -- 35 | -- | X86 LONG DOUBLE 36 | | X86_FP80 -- 13 37 | -- | LONG DOUBLE (112 bit mantissa) 38 | | FP128 -- 14 39 | -- | PPC LONG DOUBLE (2 doubles) 40 | | PPC_FP128 -- 15 41 | -- | METADATA 42 | | METADATA -- 16 43 | -- | X86 MMX 44 | | X86_MMX -- 17 45 | -- | STRUCT_ANON: @[ispacked, eltty x N]@ 46 | | STRUCT_ANON -- 18 47 | -- | STRUCT_NAME: @[strchr x N]@ 48 | | STRUCT_NAME -- 19 49 | -- | STRUCT_NAMED: @[ispacked, eltty x N]@ 50 | | STRUCT_NAMED -- 20 51 | -- | FUNCTION: @[vararg, retty, paramty x N]@ 52 | | FUNCTION -- 21 53 | -- | TOKEN 54 | | TOKEN -- 22 55 | deriving (Show, Enum) 56 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Codes/TypeSymtab.hs: -------------------------------------------------------------------------------- 1 | module Data.BitCode.LLVM.Codes.TypeSymtab where 2 | 3 | -- | The type symbol table only has one code (TST_ENTRY_CODE). 4 | data TypeSymtab 5 | = TST_CODE_UNUSED0 -- 0 6 | -- | TST_ENTRY: @[typeid, namechar x N]@ 7 | | TST_CODE_ENTRY -- 1 8 | deriving (Show, Enum) 9 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Codes/UseList.hs: -------------------------------------------------------------------------------- 1 | module Data.BitCode.LLVM.Codes.UseList where 2 | 3 | data UseList 4 | = USELIST_UNUSED0 5 | -- | DEFAULT: [index..., value-id] 6 | | USELIST_CODE_DEFAULT -- 1 7 | -- | BB: [index..., bb-id] 8 | | USELIST_CODE_BB -- 2 9 | deriving (Show, Enum) 10 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Codes/ValueSymtab.hs: -------------------------------------------------------------------------------- 1 | module Data.BitCode.LLVM.Codes.ValueSymtab where 2 | 3 | -- | Value symbol table codes. 4 | data ValueSymtabCodes 5 | = VST_CODE_UNUSED0 -- 0 6 | -- | VST_ENTRY: @[valueid, namechar x N]@ 7 | | VST_CODE_ENTRY -- 1 8 | -- | VST_BBENTRY: @[bbid, namechar x N]@ 9 | | VST_CODE_BBENTRY -- 2 10 | -- | VST_FNENTRY: @[valueid, offset, namechar x N]@ 11 | | VST_CODE_FNENTRY -- 3 12 | -- | VST_COMBINED_FNENTRY: @[offset, namechar x N]@ 13 | | VST_CODE_COMBINED_FNENTRY -- 4 14 | deriving (Show, Enum) 15 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Flags.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveGeneric #-} 2 | module Data.BitCode.LLVM.Flags where 3 | 4 | import GHC.Generics (Generic) 5 | import Data.Binary (Binary) 6 | 7 | -- | OverflowingBinaryOperatorOptionalFlags - Flags for serializing 8 | -- OverflowingBinaryOperator's SubclassOptionalData contents. 9 | data OverflowingBinaryOperatorOptional 10 | = NO_UNSIGNED_WRAP -- 0 11 | | NO_SIGNED_WRAP -- 1 12 | deriving (Show, Enum, Eq, Generic) 13 | 14 | instance Binary OverflowingBinaryOperatorOptional 15 | 16 | -- | PossiblyExactOperatorOptionalFlags - Flags for serializing 17 | -- PossiblyExactOperator's SubclassOptionalData contents. 18 | data PossiblyExactOperatorOptional 19 | = EXACT -- 0 20 | deriving (Show, Enum, Eq, Generic) 21 | 22 | instance Binary PossiblyExactOperatorOptional 23 | 24 | -- | Markers and flags for call instruction. 25 | data CallMarkers 26 | = CALL_TAIL -- 0 27 | | CALL_CCONV -- 1 28 | | CALL_UNUSED2 | CALL_UNUSED3 | CALL_UNUSED4 | CALL_UNUSED5 | CALL_UNUSED6 29 | | CALL_UNUSED7 | CALL_UNUSED8 | CALL_UNUSED9 | CALL_UNUSED10 | CALL_UNUSED11 30 | | CALL_UNUSED12 | CALL_UNUSED13 31 | | CALL_MUSTTAIL -- 14 32 | | CALL_EXPLICIT_TYPE -- 15 33 | | CALL_NOTAIL -- 16 34 | -- | Call has optional fast-math-flags. 35 | | CALL_FMF -- 17 36 | deriving (Enum, Show, Eq, Generic) 37 | 38 | instance Binary CallMarkers 39 | 40 | data Flag = Overflow OverflowingBinaryOperatorOptional 41 | | Exact PossiblyExactOperatorOptional 42 | | Call CallMarkers 43 | deriving (Show, Eq, Generic) 44 | 45 | instance Binary Flag 46 | 47 | flagValue :: Flag -> Int 48 | flagValue (Overflow x) = fromEnum x 49 | flagValue (Exact x) = fromEnum x 50 | flagValue (Call x) = fromEnum x 51 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/FromBitCode.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE RecordWildCards #-} 2 | {-# LANGUAGE ViewPatterns #-} 3 | {-# LANGUAGE TupleSections #-} 4 | {-# LANGUAGE LambdaCase #-} 5 | module Data.BitCode.LLVM.FromBitCode where 6 | 7 | import Data.Bits (testBit, shift, (.|.), (.&.), complement, FiniteBits) 8 | import Data.Word (Word64) 9 | 10 | import Control.Monad (when, unless, foldM, foldM_, zipWithM) 11 | 12 | import Data.BitCode (NBitCode(..), normalize, records, blocks, lookupBlock, lookupRecord) 13 | import qualified Data.BitCode as BC 14 | import Data.BitCode.LLVM 15 | import Data.BitCode.LLVM.Util 16 | import Data.BitCode.LLVM.Classes.HasType as T 17 | import Data.BitCode.LLVM.Reader.Monad 18 | import Data.BitCode.LLVM.ParamAttr 19 | import Data.BitCode.LLVM.IDs.Blocks as B 20 | import qualified Data.BitCode.LLVM.Codes.Identification as IC 21 | import Data.BitCode.LLVM.Codes.AttributeKind 22 | import Data.BitCode.LLVM.Codes.Attribute 23 | import Data.BitCode.LLVM.Codes.ValueSymtab 24 | import Data.BitCode.LLVM.Codes.Constants 25 | import Data.BitCode.LLVM.Codes.AtomicOrdering 26 | import Data.BitCode.LLVM.Codes.SynchronizationScope 27 | import Data.BitCode.LLVM.Codes.Metadata as MD 28 | import Data.BitCode.LLVM.Codes.Function as FC 29 | 30 | import Data.BitCode.LLVM.Types 31 | 32 | import Data.BitCode.LLVM.Value as V hiding (trace, traceM) 33 | import Data.BitCode.LLVM.Type as T 34 | import Data.BitCode.LLVM.Instruction as I 35 | import Data.BitCode.LLVM.Function as F 36 | import Data.BitCode.LLVM.Metadata 37 | import Data.BitCode.LLVM.Codes.Type as TC 38 | import Data.BitCode.LLVM.Codes.Module as M 39 | import Data.Maybe (catMaybes, fromMaybe) 40 | 41 | import qualified Data.Set as Set 42 | 43 | import qualified Data.BitCode.LLVM.Linkage as Linkage 44 | import qualified Data.BitCode.LLVM.Visibility as Visibility 45 | import qualified Data.BitCode.LLVM.ThreadLocalMode as ThreadLocalMode 46 | import qualified Data.BitCode.LLVM.StorageClass as DLLStorageClass 47 | 48 | 49 | import qualified Data.BitCode.LLVM.Flags as Flags 50 | 51 | import Data.BitCode.LLVM.Opcodes.Binary as BinOp 52 | import qualified Data.BitCode.LLVM.Opcodes.Cast as CastOp 53 | 54 | import GHC.Stack (HasCallStack) 55 | 56 | 57 | -- Conceptuall we take bitcode and interpret it as LLVM IR. 58 | -- This should result in a single module. 59 | 60 | parseAttr :: [NBitCode] -> LLVMReader () 61 | parseAttr = mapM_ parseAttr . records 62 | where parseAttr :: (AttributeCode, [BC.Val]) -> LLVMReader () 63 | parseAttr (PARAMATTR_CODE_ENTRY, gidxs) = tellParamattr $ map fromIntegral gidxs 64 | parseAttr (c,_) = fail $ "PARAMATTR: code: " ++ show c ++ "not (yet) supported" 65 | 66 | -- | Documentation on this is pretty spotty. 67 | -- 68 | -- Best so far is http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp?view=diff&r1=174848&r2=174849&pathrev=174849 69 | -- 70 | -- ops: [goupId, idx, ...] where 71 | -- idx = 0 -> return value attributes. 72 | -- idx = 2^32-1 -> function attributes 73 | -- idx = n -> n'th argument attribute. 74 | -- flag: 0,n -> AttributeKind(n) 75 | -- 1,KIND_ALIGNMENT,n -> AlignmentAttr += n 76 | -- 1,_,n -> StackAlignmentAttr += n 77 | -- 4,...,0,...0 -> String: Kind, Value 78 | -- _,...,0 -> String: Kind (empty value) 79 | parseAttrGroup :: [NBitCode] -> LLVMReader () 80 | parseAttrGroup = mapM_ parseAttrGroup . records 81 | where parseAttrGroup :: (AttributeCode, [BC.Val]) -> LLVMReader () 82 | parseAttrGroup (PARAMATTR_GRP_CODE_ENTRY, (n:vals)) = tellParamattrGroup (fromIntegral n, parseAttrGroupEntry vals) 83 | parseAttrGroup (c,_) = fail $ "PARAMATTR_GROUP: code: " ++ show c ++ "not (yet) supported" 84 | 85 | parseAttrGroupEntry :: [BC.Val] -> ParamAttrGroupEntry 86 | parseAttrGroupEntry (t:vs) = GroupEntry (grpIdx t) (go vs) 87 | where grpIdx :: BC.Val -> ParamAttrGroupIdx 88 | grpIdx 0 = Data.BitCode.LLVM.ParamAttr.Ret 89 | grpIdx 0xffffffff = Fun 90 | grpIdx n = Param n 91 | go :: [BC.Val] -> [ParamAttrEntry] 92 | go [] = [] 93 | go (0:n:vs) = Kind (toEnum . fromIntegral $ n):go vs 94 | go (1:a:n:vs) | a == fromIntegral (fromEnum ATTR_KIND_ALIGNMENT) = Align n:go vs 95 | | otherwise = StackAlign n:go vs 96 | go (4:vs) = let key = map (toEnum . fromIntegral) $ takeWhile (/= 0) vs 97 | val = map (toEnum . fromIntegral) $ takeWhile (/= 0) (drop (length key + 1) vs) 98 | in Pair key (Just val):go (drop (length key + length val + 2) vs) 99 | go (_:vs) = let key = map (toEnum . fromIntegral) $ takeWhile (/= 0) vs 100 | in Pair key Nothing:go (drop (length key + 1) vs) 101 | 102 | 103 | 104 | -- 12 - Function 105 | -- see below 106 | 107 | -- 13 - Identification 108 | parseIdent :: [NBitCode] -> LLVMReader Ident 109 | parseIdent body = let Just s = lookupRecord IC.STRING body 110 | Just [e] = lookupRecord IC.EPOCH body 111 | in return $ Ident (map toEnum' s) (toEnum' e) 112 | 113 | -- parseBlock (BC.Block code _ body) | code == fromEnum VALUE_SYMTAB -- 14 114 | -- = let parseValue :: BC.Record -> (Int,ValueSymbolEntry) 115 | -- parseValue (code, vals) = case toEnum code of 116 | -- VST_CODE_ENTRY -> let (idx:vs) = vals in (fromIntegral idx, Entry $ map (toEnum . fromIntegral) vs) 117 | -- VST_CODE_FNENTRY -> let (idx:offset:vs) = vals in (fromIntegral idx, FnEntry (fromIntegral offset) $ map (toEnum . fromIntegral) vs) 118 | -- in pure $ ValueSymTab . map parseValue . catMaybes . map toRecord $ body 119 | 120 | -- 15 - Metadata 121 | -- 16 - MetadataAttachment 122 | -- Plan of attack 123 | -- Normalize Records (Abbrev, and Unabbrev into Record -- how do we handle the *richer* types?[1]) 124 | -- We somehow need to drop `len` ops from it though. Probably best to have some special Field type 125 | -- so we can filter non-control ops out. 126 | 127 | 128 | -- [1] could turn Char into Word64 through Ord; would then have to reinterpret them accordingly. 129 | 130 | 131 | -- 17 - Type (new) 132 | -- Needs TypeTableReader and TypeTableWriter 133 | -- NOTE: We do some extra dance because Opaque and Named Structures are emitted as 134 | -- a record that holds the name (STRUCT_NAME) and then a record that consumes that 135 | -- name (OPAQUE and STRUCT_NAMED). 136 | -- TODO: Add assert that length <$> askTypeList is equal to the value obtained from the numentry record. 137 | parseTypes :: [NBitCode] -> LLVMReader () 138 | parseTypes = foldM_ parseType Nothing . records 139 | where parseType :: Maybe String -> (Type,[BC.Val]) -> LLVMReader (Maybe String) 140 | parseType name = \case 141 | -- ignore number of entries record. 142 | (NUMENTRY, [n] ) -> pure name 143 | (NUMENTRY, [] ) -> error $ "Invalid record: empty NUMENTRY" 144 | (VOID, [] ) -> tellType Void >> pure name 145 | (FLOAT, [] ) -> tellType T.Float >> pure name 146 | (DOUBLE, [] ) -> tellType Double >> pure name 147 | (LABEL, [] ) -> tellType T.Label >> pure name 148 | (OPAQUE, [] ) 149 | | (Just n) <- name -> tellType (Opaque n) >> pure Nothing 150 | | otherwise -> fail "Opaque needs a name!" 151 | -- TODO: why does OPAQUE check Record.size() != 1, and if so result in Invalid record? 152 | -- (OPAQUE, [_] ) -> error $ "Invalid record: OPAQUE must not" 153 | (INTEGER, [width] ) -> (tellType $ T.Int width) >> pure name 154 | (INTEGER, [] ) -> error $ "Invalid record: empty INTEGER" 155 | -- TODO: Check that the resulting type is valid (!Void, !Label, !Metadata, !Token) 156 | (POINTER, [tyId, width] ) -> askType tyId >>= tellType . Ptr width >> pure name 157 | (POINTER, [tyId] ) -> askType tyId >>= tellType . Ptr 0 >> pure name 158 | (POINTER, [] ) -> error $ "Invalid record: empty POINTER" 159 | (HALF, [] ) -> tellType Half >> pure name 160 | -- TODO: Valid element types (!Void, !Label, !Metadata, !Function, !Token) 161 | (ARRAY, [numElts, eltTyId] ) -> askType eltTyId >>= tellType . T.Array numElts >> pure name 162 | (ARRAY, x ) | length x < 2 -> error $ "Invalid record: ARRAY" 163 | -- TODO: Valid element type (Integer, FloatingPoint, Pointer) 164 | (VECTOR, [numElts, eltTyId] ) -> askType eltTyId >>= tellType . T.Vector numElts >> pure name 165 | (VECTOR, x ) | length x < 2 -> error $ "Invalid record: VECTOR" 166 | (X86_FP80, [] ) -> tellType X86Fp80 >> pure name 167 | (FP128, [] ) -> tellType Fp128 >> pure name 168 | (TC.METADATA, [] ) -> tellType T.Metadata >> pure name 169 | (X86_MMX, [] ) -> tellType X86Mmx >> pure name 170 | -- TODO: Valid element type: (!Void, !Label, !Metadata, !Function, !Token) 171 | -- TODO: Should also check that elemTyIds is the smae number after asking for the type (e.g. that askting for the type does not fail.) 172 | (STRUCT_ANON, (isPacked:eltTyIds) ) -> mapM askType eltTyIds >>= tellType . StructAnon (isPacked /= 0) >> pure name 173 | (STRUCT_ANON, [] ) -> error $ "Invalid record: Anon struct must have at least one op." 174 | (STRUCT_NAME, ops ) -> pure (pure (toString ops)) 175 | (STRUCT_NAMED,(isPacked:eltTyIds) ) 176 | | (Just n) <- name -> mapM askType eltTyIds >>= tellType . StructNamed n (isPacked /= 0) >> pure Nothing 177 | | otherwise -> fail "Named Struct needs a name!" 178 | (STRUCT_NAMED, [] ) -> error $ "Invalid record: Named struct must have at least one op." 179 | -- TODO: If the number of argument type does not match paramTys, this is an invalid record. 180 | -- Must filter for isValidArgumentType (=isFirstClassType), (!Void, !Function) 181 | (TC.FUNCTION, (vararg:retTy:paramTys)) -> T.Function (vararg /= 0) <$> askType retTy <*> mapM askType paramTys >>= tellType >> pure name 182 | (TC.FUNCTION, [_] ) -> error $ "Invalid record: FUNCTION must have at least two ops." 183 | (TOKEN, [] ) -> tellType Token >> pure name 184 | (code, ops ) -> fail $ "Can not handle type: " ++ show code ++ " with ops: " ++ show ops 185 | toString :: (Integral a) => [a] -> String 186 | toString = map (toEnum . fromIntegral) 187 | 188 | -- 11 - Constants 189 | -- Parse Constnats and add them to the valueList 190 | 191 | -- | toSigned helper. Bitcode does doesn't encode signed 192 | -- values actually. But if a signed value needs to be 193 | -- encoded (an this can only be application specific) 194 | -- it is shifted by one and the low bit is set if 195 | -- negative. This function reverses that encoding. 196 | toSigned :: (FiniteBits a) => a -> a 197 | toSigned v | testBit v 0 = complement (shift v (-1)) 198 | | otherwise = shift v (-1) 199 | 200 | -- | Parse constants. 201 | parseConstants :: HasCallStack => [NBitCode] -> LLVMReader () 202 | parseConstants = foldM_ parseConstant undefined . records 203 | where parseConstant :: Ty -> (Constant,[BC.Val]) -> LLVMReader Ty 204 | parseConstant ty = \case 205 | -- Also invalid record, if tyId outside of Typelist. 206 | (CST_CODE_SETTYPE, [tyId]) -> askType tyId 207 | (CST_CODE_SETTYPE, [] ) -> error $ "Invalid record: empty constants SETTYPE." 208 | (CST_CODE_NULL, [] ) -> add $ mkConst V.Null 209 | (CST_CODE_UNDEF, [] ) -> add $ mkConst V.Undef 210 | (CST_CODE_INTEGER, [val] ) | T.Int{} <- ty -> add $ mkConst (V.Int (toSigned val)) 211 | | otherwise -> error "Invalid record: INTEGER constant, but type is not Int" 212 | (CST_CODE_INTEGER, [] ) -> error "Invalid record: INTEGER must have one op!" 213 | (CST_CODE_WIDE_INTEGER, vals) | T.Int{} <- ty -> add $ mkConst (V.WideInt (map toSigned vals)) 214 | | otherwise -> error "Invalid record: WIDE_INTEGER constant, but type is not Int" 215 | (CST_CODE_WIDE_INTEGER, [] ) -> error "Invalid record: WIDE_INTEGER must have one op!" 216 | -- TODO: how do we interpret a Word64 as a FPVal? 217 | -- CST_CODE_FLOAT -> let [val] = vals 218 | -- in (++(ty, C.Float val)) <$> go ty rs 219 | (CST_CODE_FLOAT, [] ) -> error "Invalid record: FLOAT must have one op!" 220 | (CST_CODE_AGGREGATE, []) -> error "Invalid record: AGGREGATE must have at least one op!" 221 | (CST_CODE_AGGREGATE, valIds) 222 | -- XXX: We *assume*, but do not verify that the types of the askValue's actually match those of the structure. 223 | | T.StructAnon _ ts <- ty -> add =<< mkConst <$> (V.Struct <$> zipWithM askValue ts valIds) 224 | | T.StructNamed _ _ ts <- ty -> add =<< mkConst <$> (V.Struct <$> zipWithM askValue ts valIds) 225 | | T.Array _ t <- ty -> add =<< mkConst <$> (V.Array <$> mapM (askValue t) valIds) 226 | | T.Vector _ t <- ty -> add =<< mkConst <$> (V.Vector <$> mapM (askValue t) valIds) 227 | | otherwise -> add $ mkConst V.Undef 228 | (CST_CODE_STRING, [] ) -> error "Invalid record: STRING must have at least one op!" 229 | (CST_CODE_STRING, vals ) -> add $ mkConst (V.String $ map toEnum' vals) 230 | (CST_CODE_CSTRING, [] ) -> error "Invalid record: CSTRING must have at least one op!" 231 | (CST_CODE_CSTRING, vals) -> add $ mkConst (V.CString $ map toEnum' vals) 232 | -- TODO: support Constant Binop with 4 operands. 233 | (CST_CODE_CE_BINOP, [code, lhs, rhs]) -> add =<< mkConst <$> (V.BinOp (toEnum' code) <$> askValue' lhs <*> askValue' rhs) 234 | (CST_CODE_CE_BINOP, _) -> error "Invalid record: BINOP only suppored with exactly three ops!" 235 | (CST_CODE_CE_CAST, [ code, tyId, valId ]) -> add =<< mkConst <$> (V.Cast <$> askType tyId <*> pure (toEnum' code) <*> askValue' valId) 236 | (CST_CODE_CE_BINOP, _) -> error "Invalid record: CAST only suppored with exactly three ops!" 237 | -- TODO: proper parsing. 238 | -- if even, assume pointee = nullptr 239 | -- otherwise use first record. 240 | -- the rest is [ty, val, ...], if type lookup fails -> Invalid record. 241 | -- See INBOUNDS_GEP 242 | (CST_CODE_CE_GEP, vals) -> add $ mkConst (V.Gep vals) 243 | -- TODO: CST_CODE_CE_SELECT 244 | -- CST_CODE_CE_EXTRACTELT 245 | -- CST_CODE_CE_INSERTELT 246 | -- CST_CODE_CE_SHUFFLEVEC 247 | -- CST_CODE_CE_CMP 248 | -- CST_CODE_CE_INLINEASM_OLD 249 | -- CST_CODE_CE_SHUFVEC_EX 250 | -- see GEP. 251 | (CST_CODE_CE_INBOUNDS_GEP, (v:vs)) 252 | -- either [t, [tyId, valId, ...]] 253 | | length vs `mod` 2 == 0 -> do 254 | t <- askType v 255 | add =<< mkConst . V.InboundsGep t <$> getTypedSymbols vs 256 | | otherwise -> do 257 | let t = Ptr 0 Void -- nullptr 258 | add =<< mkConst . V.InboundsGep t <$> getTypedSymbols vs 259 | -- TODO: CST_CODE_BLOCKADDRESS 260 | -- CST_CODE_DATA 261 | -- CST_CODE_INLINEASM 262 | (c,vs) -> error $ "Code: " ++ show c ++ " not supported; values: " ++ show vs ++ "; current type: " ++ show ty 263 | where mkConst :: Const -> Value 264 | mkConst = Constant ty 265 | add :: Value -> LLVMReader Ty 266 | add val = tellValue val >> pure ty 267 | -- WARNING - TODO: Converting Word64 to possible Int(32). 268 | toSigned :: Word64 -> Int 269 | toSigned w = fromIntegral $ case (testBit w 0, shift w (-1)) of 270 | (True, v) -> -v 271 | (False, v) -> v 272 | getTypedSymbols :: [Word64] -> LLVMReader [Symbol] 273 | getTypedSymbols [] = pure [] 274 | getTypedSymbols (tId:vId:vs) = do 275 | t <- askType tId 276 | v <- askValue t vId 277 | -- TODO: check that the type of v matches t. 278 | (v:) <$> getTypedSymbols vs 279 | 280 | -- Metadata kind 22 281 | parseMetadataKinds :: [NBitCode] -> LLVMReader () 282 | parseMetadataKinds = mapM_ parseMetadataKind . records 283 | where parseMetadataKind :: (MD.Metadata,[BC.Val]) -> LLVMReader () 284 | parseMetadataKind = \case 285 | (MD.METADATA_KIND, (idx:vals)) -> tellMetadataKind ((fromIntegral idx), map toEnum' vals) 286 | _ -> pure () -- ignore. 287 | -- Metadata 15 288 | parseMetadata :: [NBitCode] -> LLVMReader () 289 | parseMetadata = mapM_ parseMetadata . records 290 | where parseMetadata :: (MD.Metadata, [BC.Val]) -> LLVMReader () 291 | parseMetadata = \case 292 | (MD.METADATA_STRING, vals) -> tellMetadata $ MDString (map toEnum' vals) 293 | (MD.METADATA_VALUE, [tyId,val]) -> askType tyId >>= \ty -> tellMetadata $ MDValue ty val 294 | (MD.METADATA_NODE, mdIds) -> tellMetadata =<< MDNode <$> mapM askMetadata (map pred mdIds) 295 | (MD.METADATA_NAME, vals) -> tellMetadata $ MDName (map toEnum' vals) 296 | (MD.METADATA_DISTINCT_NODE, mdIds) -> tellMetadata =<< MDDistinctNode <$> mapM askMetadata (map pred mdIds) 297 | (MD.METADATA_LOCATION, [distinct, line, col, scope, inlinedAt]) -> tellMetadata $ MDLocation (distinct /= 0) line col scope inlinedAt 298 | -- this is such a weird encoding. 299 | -- basically emit the name. And then emit a named node, to use the just emitted name. 300 | (MD.METADATA_NAMED_NODE, mIds) -> popMetadata >>= \(MDName name) -> MDNamedNode name <$> mapM askMetadata mIds >>= tellMetadata 301 | (MD.METADATA_KIND, (idx:vals)) -> tellMetadataKind ((fromIntegral idx), map toEnum' vals) 302 | (c, ops) -> fail $ "Unsupported metadata: " ++ show c ++ " with ops: " ++ show ops 303 | 304 | -- * module codes 305 | 306 | parseVersion :: HasCallStack => [BC.Val] -> Word64 307 | parseVersion [v] = v 308 | 309 | parseTriple :: HasCallStack => [BC.Val] -> String 310 | parseTriple = map toEnum' 311 | 312 | parseDataLayout :: HasCallStack => [BC.Val] -> String 313 | parseDataLayout = map toEnum' 314 | 315 | parseGlobalVar :: HasCallStack => [BC.Val] -> LLVMReader () 316 | parseGlobalVar vals 317 | | length vals < 6 = error $ "Invalid record: Global Var must have at least six operands. " ++ show vals ++ " given." 318 | | [ ptrTyId, isConst, initId, linkage, paramAttrId, section ] <- vals = do 319 | ty <- askType ptrTyId 320 | unless (testBit isConst 1) $ fail "non-explicit type global vars are not (yet) supported" 321 | let addressSpace = shift isConst (-2) 322 | initVal = if initId /= 0 then Just (Unnamed undefined ty (FwdRef (initId -1))) else Nothing 323 | linkage' = toEnum' linkage 324 | storageClass = upgradeDLLImportExportLinkage linkage' 325 | comdat = 0 -- XXX. the Reader does some weird hasImplicitComdat for older bitcode. 326 | -- we'll ignore this here for now. 327 | tellValue $ Global (Ptr 0 ty) (testBit isConst 0) addressSpace initVal 328 | linkage' paramAttrId section Visibility.Default ThreadLocalMode.NotThreadLocal 329 | False False storageClass comdat 330 | | [ ptrTyId, isConst, initId, linkage 331 | , paramAttrId, section, visibility, threadLocalMode 332 | , unnamedAddr, externallyInitialized, storageClass 333 | , comdat ] <- vals = do 334 | ty <- askType ptrTyId 335 | -- TODO: isConst has bit 0 set if const. bit 1 if explicit type. We only handle explicit type so far. 336 | unless (testBit isConst 1) $ fail "non-explicit type global vars are not (yet) supported" 337 | let addressSpace = shift isConst (-2) 338 | initVal = if initId /= 0 then Just (Unnamed undefined ty (FwdRef (initId - 1))) else Nothing 339 | 340 | tellValue $ Global (Ptr 0 ty) (testBit isConst 0) addressSpace initVal 341 | (toEnum' linkage) paramAttrId section (toEnum' visibility) (toEnum' threadLocalMode) 342 | (unnamedAddr /= 0) (externallyInitialized /= 0) (toEnum' storageClass) comdat 343 | | otherwise = error $ "unhandled parseGlobalVar with values: " ++ (show vals) 344 | 345 | -- TODO: if less than eight values -> Invalid record. 346 | -- if type can not be reconstructed, invalid record. 347 | -- if ty can not be cast to function type. -> invalid value 348 | 349 | parseFunctionDecl :: HasCallStack => [BC.Val] -> LLVMReader () 350 | parseFunctionDecl vals = askVersion >>= \case 351 | 1 -> parseFunctionDecl' vals 352 | 2 -> parseFunctionDecl' (drop 2 vals) 353 | 354 | parseFunctionDecl' :: HasCallStack => [BC.Val] -> LLVMReader () 355 | parseFunctionDecl' 356 | [ tyId, cconv, isProto, linkage -- 4 357 | , paramAttrId, alignment, section, visibility, gc -- 9 358 | , unnamedAddr, prologueDataId, storageClass, comdat -- 13 359 | , prefixDataId, personality ] -- 15 360 | = do 361 | ty <- askType tyId 362 | let prologueData = if prologueDataId /= 0 then Just (Unnamed undefined ty (FwdRef (prologueDataId -1))) else Nothing 363 | prefixData = if prefixDataId /= 0 then Just (Unnamed undefined ty (FwdRef (prefixDataId -1))) else Nothing 364 | tellValue $ V.Function (Ptr 0 ty) (toEnum' cconv) (toEnum' linkage) 365 | paramAttrId alignment section (toEnum' visibility) gc 366 | (unnamedAddr /= 0) (toEnum' storageClass) 367 | comdat personality (FE (isProto /= 0) prologueData prefixData) 368 | 369 | parseFunctionDecl' vs = fail $ "Failed to parse functiond decl from " ++ show (length vs) ++ " values " ++ show vs 370 | 371 | upgradeDLLImportExportLinkage :: Linkage.Linkage -> DLLStorageClass.DLLStorageClass 372 | upgradeDLLImportExportLinkage = \case 373 | Linkage.WeakODR -> DLLStorageClass.DLLImport 374 | Linkage.Appending -> DLLStorageClass.DLLExport 375 | _ -> DLLStorageClass.Default 376 | 377 | parseAlias :: HasCallStack 378 | => Bool -- ^ New Alias 379 | -> [BC.Val] -- Values 380 | -> LLVMReader () 381 | 382 | -- old alias, without addrSpace explicitly given. 383 | parseAlias False [ tyId, valId, linkage, visibility, storageClass, threadLocalMode, unnamedAddr ] = do 384 | nVals <- length <$> askValueList 385 | nTypes <- length <$> askTypeList 386 | ty@(Ptr addrSpace _) <- askType tyId 387 | -- val <- askValue valId 388 | tellValue $ Alias ty addrSpace (Unnamed undefined undefined (V.FwdRef valId)) (toEnum' linkage) (toEnum' visibility) (toEnum' threadLocalMode) (unnamedAddr /= 0) (toEnum' storageClass) 389 | parseAlias False [ tyId, valId, linkage, visibility, storageClass, threadLocalMode ] 390 | = parseAlias False [ tyId, valId, linkage, visibility, storageClass, threadLocalMode, 0 ] 391 | parseAlias False [ tyId, valId, linkage, visibility, storageClass ] 392 | = parseAlias False [ tyId, valId, linkage, visibility, storageClass, (fromIntegral (fromEnum ThreadLocalMode.NotThreadLocal)) ] 393 | parseAlias False [ tyId, valId, linkage, visibility ] 394 | = parseAlias False [ tyId, valId, linkage, visibility, (fromIntegral . fromEnum $ upgradeDLLImportExportLinkage (toEnum' linkage)) ] 395 | parseAlias False [ tyId, valId, linkage ] 396 | = parseAlias False [ tyId, valId, linkage, (fromIntegral (fromEnum Visibility.Default)) ] 397 | -- new 398 | parseAlias True [ tyId, addrSpace, valId, linkage, visibility, storageClass, threadLocalMode, unnamedAddr ] = do 399 | ty <- askType tyId 400 | val <- askValue ty valId 401 | tellValue $ Alias ty addrSpace val (toEnum' linkage) (toEnum' threadLocalMode) (toEnum' visibility) (unnamedAddr /= 0) (toEnum' storageClass) 402 | parseAlias True [ tyId, addrSpace, valId, linkage, visibility, storageClass, threadLocalMode ] 403 | = parseAlias True [ tyId, addrSpace, valId, linkage, visibility, storageClass, threadLocalMode, 0 ] 404 | parseAlias True [ tyId, addrSpace, valId, linkage, visibility, storageClass ] 405 | = parseAlias True [ tyId, addrSpace, valId, linkage, visibility, storageClass, (fromIntegral (fromEnum ThreadLocalMode.NotThreadLocal)) ] 406 | parseAlias True [ tyId, addrSpace, valId, linkage, visibility ] 407 | = parseAlias True [ tyId, addrSpace, valId, linkage, visibility, (fromIntegral . fromEnum $ upgradeDLLImportExportLinkage (toEnum' linkage)) ] 408 | parseAlias True [ tyId, addrSpace, valId, linkage ] 409 | = parseAlias True [ tyId, addrSpace, valId, linkage, (fromIntegral (fromEnum Visibility.Default)) ] 410 | 411 | -- helper 412 | toEnum' :: (HasCallStack, Integral a, Enum e) => a -> e 413 | toEnum' = toEnum . fromIntegral 414 | 415 | parseTopLevel :: HasCallStack => [NBitCode] -> LLVMReader (Maybe Ident, Module) 416 | parseTopLevel bs = do 417 | ident <- case lookupBlock IDENTIFICATION bs of 418 | Just b -> Just <$> parseIdent b 419 | Nothing -> return Nothing 420 | 421 | let Just moduleBlock = lookupBlock MODULE bs 422 | mod <- parseModule moduleBlock 423 | return (ident, mod) 424 | 425 | resolveFwdRefs :: HasCallStack => [Symbol] -> [Symbol] 426 | resolveFwdRefs ss = map (fmap' resolveFwdRef') ss 427 | where 428 | -- TODO: Maybe Symbol should be more generic? Symbol a, 429 | -- then we could have Functor Symbol. 430 | fmap' :: (Value -> Value) -> Symbol -> Symbol 431 | fmap' f (Named s i t v) = let v' = f v in Named s i (ty v) v 432 | fmap' f (Unnamed i t v) = let v' = f v in Unnamed i (ty v) v 433 | resolveFwdRef' :: Value -> Value 434 | resolveFwdRef' g@(Global{..}) = case gInit of 435 | Just s | (FwdRef id) <- symbolValue s -> g { gInit = Just $ (ss !! (fromIntegral id)) } 436 | _ -> g 437 | -- resolve fws refs only for globals for now. 438 | resolveFwdRef' x = x 439 | 440 | -- | Parse a module from a set of blocks (the body of the module) 441 | parseModule :: HasCallStack => [NBitCode] -> LLVMReader Module 442 | parseModule bs = do 443 | let Just version = parseVersion <$> lookupRecord VERSION bs 444 | triple = parseTriple <$> lookupRecord TRIPLE bs 445 | layout = parseDataLayout <$> lookupRecord DATALAYOUT bs 446 | vst = parseSymbolValueTable <$> lookupBlock VALUE_SYMTAB bs 447 | 448 | tellVersion version 449 | trace "Parsing Blocks" 450 | flip mapM_ bs $ \case 451 | (NBlock c bs') -> parseModuleBlock (toEnum (fromIntegral c), bs') 452 | (NRec c vs) -> parseModuleRecord (toEnum (fromIntegral c), vs) 453 | 454 | trace "Parsing VST" 455 | -- update values with symbols 456 | case vst of 457 | Just vst -> tellValueSymbolTable vst 458 | Nothing -> pure () 459 | 460 | -- update forward references 461 | resolveFwdRefs <$> askValueList >>= tellValueList 462 | 463 | -- obtain a snapshot of all current values 464 | values <- askValueList 465 | 466 | trace "Parsing Decls" 467 | 468 | let functionDefs = [f | f@(Named _ _ _ (V.Function {..})) <- values, not (feProto fExtra)] ++ 469 | [f | f@(Unnamed _ _ (V.Function {..})) <- values, not (feProto fExtra)] 470 | functionDecl = [f | f@(Named _ _ _ (V.Function {..})) <- values, feProto fExtra ] ++ 471 | [f | f@(Unnamed _ _ (V.Function {..})) <- values, feProto fExtra ] 472 | (unless (length functionDefs == length functionBlocks)) $ fail $ "#functionDecls (" ++ show (length functionDefs) ++ ") does not match #functionBodies (" ++ show (length functionBlocks) ++ ")" 473 | 474 | trace "Parsing Functions" 475 | 476 | fns <- mapM parseFunction (zip functionDefs functionBlocks) 477 | 478 | let functionDefs = map dSig fns 479 | 480 | typeSet <- askTypeList 481 | 482 | let isConstant x 483 | | (V.Constant{}) <- V.symbolValue x = True 484 | | otherwise = False 485 | 486 | 487 | constsSet <- filter isConstant <$> askValueList 488 | 489 | return $ Module version triple layout values functionDecl functionDefs fns constsSet typeSet 490 | where 491 | functionBlocks :: [[NBitCode]] 492 | functionBlocks = [bs' | (B.FUNCTION, bs') <- blocks bs ] 493 | symbolize :: [(Int, ValueSymbolEntry)] -> [Value] -> [Symbol] 494 | symbolize m = map (\(idx, val) -> case (lookup idx m) of 495 | Just (Entry s) -> Named s undefined (ty val) val 496 | Just (FnEntry _ s) -> Named s undefined (ty val) val 497 | Nothing -> Unnamed undefined (ty val) val 498 | ) . zip [0..] 499 | 500 | -- | Parse value symbol table 501 | parseSymbolValueTable :: HasCallStack => [NBitCode] -> ValueSymbolTable 502 | parseSymbolValueTable = foldl (\l x -> parseSymbolValue x:l) [] . filter f . records 503 | where parseSymbolValue :: (ValueSymtabCodes, [BC.Val]) -> (Int, ValueSymbolEntry) 504 | parseSymbolValue (VST_CODE_ENTRY, (idx:vs)) = (fromIntegral idx, Entry $ map toEnum' vs) 505 | parseSymbolValue (VST_CODE_FNENTRY, (idx:offset:vs)) = (fromIntegral idx, FnEntry (fromIntegral offset) $ map toEnum' vs) 506 | f :: (ValueSymtabCodes, [BC.Val]) -> Bool 507 | f (VST_CODE_ENTRY, _) = True 508 | f (VST_CODE_FNENTRY, _) = True 509 | f _ = False 510 | 511 | -- block ids 512 | parseModuleBlock :: HasCallStack => (ModuleBlockID, [NBitCode]) -> LLVMReader () 513 | parseModuleBlock (id,bs) = trace ("parseModuleBlock " ++ show id) >> case (id,bs) of 514 | ({- 9 -}PARAMATTR, bs) -> parseAttr bs 515 | ({- 10 -}PARAMATTR_GROUP, bs) -> parseAttrGroup bs 516 | ({- 11 -}CONSTANTS, bs) -> parseConstants bs 517 | ({- 12 -}B.FUNCTION, bs) -> return () -- parsing of function bodies is handled differently. 518 | ({- 13 -}IDENTIFICATION, bs) -> return () -- this is not even part of the MODULE block. But alongside the module block. 519 | ({- 14 -}VALUE_SYMTAB, bs) -> return () -- TODO 520 | ({- 15 -}B.METADATA, bs) -> parseMetadata bs 521 | ({- 16 -}METADATA_ATTACHMENT_ID, bs) -> return () -- TODO 522 | ({- 17 -}TYPE_NEW, bs) -> parseTypes bs 523 | ({- 18 -}USELIST, bs) -> return () -- TODO 524 | ({- 19 -}MODULE_STRTAB, bs) -> return () -- TODO 525 | ({- 20 -}FUNCTION_SUMMARY, bs) -> return () -- TODO 526 | ({- 21 -}OPERAND_BUNDLE_TAGS, bs) -> return () -- TODO 527 | ({- 22 -}B.METADATA_KIND, bs) -> parseMetadataKinds bs 528 | ({- 23 -}STRTAB, bs) -> return () -- TODO 529 | ({- 24 -}FULL_LTO_GLOBAL_SUMMARY, bs) -> return () -- TODO 530 | ({- 25 -}SYMTAB, bs) -> return () -- TODO 531 | ({- 26 -}SYNC_SCOPE_NAMES, bs) -> return () -- TODO 532 | c -> fail $ "Encountered unhandled block: " ++ show c 533 | 534 | parseModuleRecord :: HasCallStack => (ModuleCode, [BC.Val]) -> LLVMReader () 535 | parseModuleRecord (id,bs) = trace ("parseModuleRecord " ++ show id) >> case (id,bs) of 536 | ({- 1 -}VERSION, _) -> pure () -- ignore, it's being picked apart somewhere else. 537 | ({- 2 -}TRIPLE, _) -> pure () -- ignore 538 | ({- 3 -}DATALAYOUT, _) -> pure () -- ignore 539 | -- ({- 4 -}ASM, asm) -> -- unhandled 540 | ({- 5 -}SECTIONNAME, name) -> trace $ "!! ignoring section name " ++ (map toEnum' name) 541 | -- ({- 6 -}DEPLIB, name) -- unhanlded, will be removed in 4.0 anyway. 542 | ({- 7 -}GLOBALVAR, vs) -> parseGlobalVar vs 543 | ({- 8 -}M.FUNCTION, vs) -> parseFunctionDecl vs 544 | ({- 9 -}ALIAS_OLD, vs) -> parseAlias False vs 545 | -- ({- 10 -}PURGEVALS, numvals) -- unhandled; no idea how to implement this without chaning to stream processing of the blocks. 546 | -- ({- 11 -}GCNAME, name) -- unhandled 547 | -- ({- 12 -}COMDAT, [ sectionKind, name ]) -- unhandled 548 | -- as we do not jump to the VST, we can safely ignore it here. 549 | ({- 13 -}VSTOFFSET, [ offset ]) -> trace $ "!! ignoring VSTOffset " ++ show offset 550 | ({- 14 -}ALIAS, vs) -> parseAlias True vs 551 | -- ({- 15 -}METADATA_VALUES, numvals) 552 | -- ignore others; e.g. we only need to parse the ones above in sequence to populate the valuetable properly. 553 | ({- 16 -}SOURCE_FILENAME, name) -> trace $ "!! ignoring source filename " ++ (map toEnum' name) 554 | ({- 17 -}HASH, vs) -> trace $ "!! ignoring hash " ++ show vs 555 | -- ({- 18 -}IFUNC, [ valty, addrspace, resolverval, link, visibility ]) 556 | (id,ops) -> fail $ "Encountered unhandled record: " ++ show id ++ " with ops: " ++ show ops 557 | 558 | 559 | -- | parsing a function block from bitcode. 560 | -- function can contain their own set of 561 | -- constants which are virtually added to 562 | -- the values table. Similarly they have their 563 | -- arguments put into the values table before 564 | -- the body is parsed. 565 | -- 566 | -- The LLVM Reader makes be believe, we can 567 | -- expect to see the instruction records as 568 | -- well as the following blocks: 569 | -- Constants, VST, MetadataAttachment, Metadata, 570 | -- Uselist. 571 | -- 572 | -- So a Function consists of 573 | -- - Constants (with maybe VST info) 574 | -- - MetadataAttachment, Metadata -- let's ignore this for now. 575 | -- - Uselist (?) 576 | -- - [Instructions] -- where we basically need to use a temporary 577 | -- ValueList = GlobalValueList + Constants + Function Arguments. 578 | -- and reset it at the end of the function. 579 | 580 | -- Function bodies should come in sequence of their declaration in the GV. 581 | -- prototype functions are external. 582 | -- 583 | parseFunction :: HasCallStack => (Symbol, [NBitCode]) -> LLVMReader F.Function 584 | parseFunction (f@(Named _ _ _ V.Function{..}), b) = do 585 | -- remember the size of the value list. We need to trim it back down after 586 | -- parsing; and might want to attach the new values to the constants of the Function. 587 | -- The same holds for metadata attachment. 588 | savedValueList <- askValueList 589 | savedVST <- askValueSymbolTable 590 | -- Not sure what we do about Uselist yet. 591 | let Ptr _ (T.Function _ _ paramTys) = fType 592 | -- put the decl header onto the valuelist. 593 | mapM_ tellValue (zipWith Arg paramTys [0..]) 594 | nVals' <- length <$> askValueList 595 | -- let's parse all constants if any. 596 | mapM_ parseFunctionBlock (blocks b) 597 | 598 | case parseSymbolValueTable <$> lookupBlock VALUE_SYMTAB b of 599 | Just vst -> tellValueSymbolTable vst 600 | Nothing -> pure () 601 | 602 | consts <- drop nVals' . resolveFwdRefs <$> askValueList 603 | -- parse the instructions 604 | -- the first basic block is going to be empty. As the body 605 | -- has to finish with a terminator, which adds a final empty 606 | -- BB to the front. 607 | (_:bbs,_) <- foldM foldHelper ([BasicBlock []],[]) (records b) 608 | -- reset the valueList to before we entered the 609 | -- function body, as they were local to 610 | tellValueList savedValueList 611 | tellValueSymbolTable savedVST 612 | return $ F.Function f consts (reverse bbs) 613 | 614 | parseFunction ((Unnamed i t f), b) = parseFunction ((Named "dummy" i t f), b) 615 | parseFunction _ = fail "Invalid arguments" 616 | 617 | 618 | parseFunctionBlock :: HasCallStack => (ModuleBlockID, [NBitCode]) -> LLVMReader () 619 | parseFunctionBlock = \case 620 | (CONSTANTS, b) -> parseConstants b 621 | (B.METADATA, b) -> parseMetadata b 622 | (B.METADATA_ATTACHMENT_ID, b) -> trace ("Ignoring Metadata attachment: " ++ show b) 623 | (B.USELIST, b) -> trace ("Cannot parse uselist yet (" ++ show b ++ ")") >> return () 624 | _ -> pure () 625 | 626 | getVal :: (HasCallStack, Integral a) => a -> LLVMReader Symbol 627 | getVal n = do 628 | valueList <- askValueList 629 | let idx = fromIntegral n 630 | if idx < 0 || idx > length valueList 631 | then fail $ "index " ++ show idx ++ " out of range [0, " ++ show (length valueList) ++ ") of available values." 632 | else pure (valueList !! idx) 633 | 634 | getVal' :: (HasCallStack, Integral a) => Ty -> a -> LLVMReader Symbol 635 | getVal' t n = do 636 | val <- getVal n 637 | if (ty val) == t 638 | then return val 639 | else do valueList <- askValueList 640 | fail $ show val ++ " (" ++ show (fromIntegral n) ++ ") doesn't have type " ++ show t 641 | 642 | getRelativeVal :: (HasCallStack, Integral a) => [Symbol] -> a -> LLVMReader Symbol 643 | getRelativeVal refs n = do 644 | valueList <- askValueList 645 | let lst = reverse (valueList ++ refs) 646 | idx = fromIntegral n - 1 647 | if idx < 0 || idx > length lst 648 | then fail $ "index " ++ (show idx) ++ " out of range [0, " ++ show (length lst) ++ ") of avaliable relative values." 649 | else pure (lst !! idx) 650 | 651 | getRelativeValWithType :: (HasCallStack, Integral a) => Ty -> [Symbol] -> a -> LLVMReader Symbol 652 | getRelativeValWithType ty refs n = do 653 | val <- getRelativeVal refs n 654 | if (T.ty val) == ty 655 | then return val 656 | else do valueList <- askValueList 657 | let lst = reverse (valueList ++ refs) 658 | idx = fromIntegral n - 1 659 | fail $ show val ++ " (" ++ show idx ++ ") doesn't have type " ++ show ty ++ "\nvalues\n" ++ unlines (map show lst) 660 | 661 | -- TODO: filter out the `FUNC_CODE_DECLAREBLOCKS` in 662 | -- the foldHelper. We can then simplify the 663 | -- parseInst function to be of result type 664 | -- LLVMReader Inst. 665 | foldHelper :: ([BasicBlock],[Symbol]) -> (Instruction, [BC.Val]) -> LLVMReader ([BasicBlock],[Symbol]) 666 | foldHelper s@((BasicBlock insts):bbs,vs) instr = do 667 | i <- parseInst vs instr 668 | case i of 669 | Nothing -> return s 670 | Just i -> do let mref = (\v -> Unnamed undefined (ty v) v) . flip TRef (length vs) <$> instTy i 671 | vs' = vs ++ [r | Just r <- [mref]] 672 | insts' = insts ++ [(mref, i)] 673 | bbs' = (BasicBlock insts'):bbs 674 | case isTerminator i of 675 | True -> return ((BasicBlock []):bbs', vs') 676 | False -> return (bbs', vs') 677 | 678 | parseInst :: HasCallStack => [Symbol] -> (Instruction, [BC.Val]) -> LLVMReader (Maybe Inst) 679 | parseInst rs = \case 680 | -- 1 681 | (DECLAREBLOCKS, x) | length x == 0 -> error "Invalid record: DECLAREBLOCKS must not be empty!" 682 | | x == [0] -> error "Invalid record: DECLAREBLOCKS must not be 0." 683 | | otherwise -> pure Nothing -- ignore. 684 | -- 2 685 | (INST_BINOP, (lhs:rhs:code:flags)) -> traceShow flags $ do 686 | lhs <- getRelativeVal rs lhs 687 | rhs <- getRelativeVal rs rhs 688 | unless ((ty lhs) == (ty rhs)) $ pure $ error "Invalid record: BINOP, LHS and RHS types do not agree." 689 | let opTy = ty (symbolValue lhs) 690 | code' = (toEnum' code) :: BinOp 691 | flags' = case flags of 692 | [] -> [] 693 | [bitfield] 694 | | code' `elem` [ADD, SUB, MUL, SHL] -> map Flags.Overflow $ filter (testBit bitfield . fromEnum) [Flags.NO_UNSIGNED_WRAP, Flags.NO_SIGNED_WRAP] 695 | | code' `elem` [UDIV, SDIV, LSHR, ASHR] -> map Flags.Exact $ filter (testBit bitfield . fromEnum) [Flags.EXACT] 696 | | otherwise -> [] 697 | _ -> error "Invalid record: At most one FLAG value allowed for BINOP." 698 | 699 | return $ Just (I.BinOp opTy code' lhs rhs flags') 700 | -- 3 701 | (INST_CAST, [ valId, tyId, opCode ]) -> do 702 | val <- getRelativeVal rs valId 703 | ty <- askType tyId 704 | let op = toEnum' opCode 705 | -- TODO: if not ty or Opc = -1 -> Invalid Record 706 | return $ Just (I.Cast ty op val) 707 | -- 4 708 | -- (INST_GEP_OLD, vals) 709 | -- 5 710 | -- (INST_SELECT, vals) 711 | -- 6 712 | -- (INST_EXTRACTELT, vals) 713 | -- 7 714 | -- (INST_INSERTELT, vals) 715 | -- 8 716 | -- (INST_SHUFFLEVEC, vals) 717 | -- 9 718 | -- (INST_CMP, vals) 719 | -- 10 720 | -- Even thought the documentaiton sais [ty [, val]], it's 721 | -- actually [val] (or [val, ty] in case of fwd ref). 722 | -- if [val] is empty. It' a Void return. 723 | (INST_RET, []) -> return . Just $ I.Ret Nothing 724 | (INST_RET, [valId]) -> do 725 | val <- Just <$> getRelativeVal rs valId 726 | return . Just $ I.Ret val 727 | (INST_RET, _) -> error "Invalid record: INST_RET can only have none or one op." 728 | -- 11 729 | (INST_BR, [bbN]) -> return . Just $ UBr bbN 730 | (INST_BR, [bbN, bbN', cond]) -> do 731 | cond' <- getRelativeVal rs cond 732 | return . Just $ Br cond' bbN bbN' 733 | (INST_BR, _) -> error "Invalid record: INST_BR can only have one or three ops." 734 | -- 12 735 | (INST_SWITCH, (opTy:cond:defaultBlock:cases)) -> do 736 | ty <- askType opTy 737 | cond' <- getRelativeVal rs cond 738 | Just . Switch cond' defaultBlock <$> parseCase ty cases 739 | where 740 | parseCase :: Ty -> [BC.Val] -> LLVMReader [(Symbol, BasicBlockId)] 741 | parseCase ty [] = pure [] 742 | parseCase ty (valId:blockId:cases) = (:) <$> ((,blockId) <$> getVal' ty valId) <*> parseCase ty cases 743 | -- 13 744 | -- (INST_INVOKE, vals) 745 | -- 14 - Unused 746 | -- 15 747 | -- (INST_UNREACHABLE, []) 748 | -- 16 749 | -- (INST_PHI, (ty:val:[bbs])) 750 | -- 17, 18 - Unused 751 | -- 19 752 | (INST_ALLOCA, [ instty, opty, op, align ]) -> do 753 | iTy <- askType instty 754 | oTy <- askType opty 755 | val <- askValue oTy op -- probably a constant. 756 | unless (oTy == ty val) $ pure $ error "Invalid record" 757 | return . Just $ Alloca (Ptr 0 iTy) val (decodeAlign align) 758 | where decodeAlign :: Word64 -> Word64 759 | decodeAlign a = 2^((a .&. (complement inAllocMask .|. explicitTypeMask .|. swiftErrorMask)) - 1) 760 | inAllocMask = shift 1 5 761 | explicitTypeMask = shift 1 6 762 | swiftErrorMask = shift 1 7 763 | (INST_ALLOCA, _) -> error "Invalid record: ALLOCA expects exactly four ops!" 764 | -- 20 765 | (INST_LOAD, [ op, opty, align, vol]) -> do 766 | oTy <- askType opty 767 | val <- getRelativeVal rs op 768 | return . Just $ Load oTy val (2^(align-1)) 769 | -- 21, 22 - Unused 770 | -- 23 771 | -- (INST_VAARG, [ valistty, valist, instty ]) 772 | -- 24 773 | -- (INST_STORE_OLD [ ptrty, ptr, val, align, vol]) 774 | -- 25 - Unused 775 | -- 26 776 | (INST_EXTRACTVAL, (op:idxs)) -> do 777 | val <- getRelativeVal rs op 778 | return . Just $ ExtractValue val idxs 779 | -- 27 780 | -- (INST_INSERTVAL, ops) 781 | -- 28 782 | (INST_CMP2, [lhs, rhs, pred]) -> do 783 | lhs' <- getRelativeVal rs lhs 784 | rhs' <- getRelativeVal rs rhs 785 | unless (ty lhs' == ty rhs') $ pure $ error "Invalid record: CMP2 lhs and rhs types do not agree." 786 | -- result type is: 787 | -- if lhs is vector of n -> Vector 788 | -- else -> i1 789 | let oTy = case (ty lhs') of 790 | T.Vector n _ -> T.Vector n (T.Int 1) 791 | _ -> T.Int 1 792 | return . Just $ Cmp2 oTy lhs' rhs' (toEnum' pred) 793 | -- 29 794 | -- (INST_VSELECT, [ ty, opval, opval, predty, pred]) 795 | -- 30 796 | -- (INST_INBOUNDS_GEP_OLD, ops) 797 | -- 31 798 | -- (INST_INDIRECTBR, (opty:ops)) 799 | -- 32 - Unused 800 | -- 33 801 | -- (DEBUG_LOC_AGAIN, []) 802 | -- 34 803 | -- [paramattrs, cc[, fmf][, explfnty], fnid, arg0, arg1...] 804 | (INST_CALL, (paramattr:cc:ops)) -> do 805 | let (fmf, ops') = if testBit cc (fromEnum Flags.CALL_FMF) then (Just (head ops), tail ops) else (Nothing, ops) 806 | let (explFnTy, ops') = if testBit cc (fromEnum Flags.CALL_EXPLICIT_TYPE) then (Just (head ops), tail ops) else (Nothing, ops) 807 | let tailCallKind | testBit cc (fromEnum Flags.CALL_TAIL) = Tail 808 | | testBit cc (fromEnum Flags.CALL_MUSTTAIL) = MustTail 809 | | testBit cc (fromEnum Flags.CALL_NOTAIL) = NoTail 810 | | otherwise = None 811 | -- cconv is encoded in the bits 1 to 11. 812 | let cconv = toEnum' (shift (cc .&. 0x7ff) (-1 * fromEnum Flags.CALL_CCONV)) 813 | let (fnid:args) = ops' 814 | fn <- getRelativeVal rs fnid 815 | 816 | fnTy <- case explFnTy of 817 | Just ty -> askType ty 818 | Nothing -> pure $ tePointeeTy (fType (symbolValue fn)) 819 | 820 | args <- mapM (getRelativeVal rs) args 821 | return . Just $ Call (teRetTy fnTy) tailCallKind cconv fn fnTy args 822 | -- 35 823 | -- (DEBUG_LOC) 824 | -- 36 825 | (INST_FENCE, [ordering, synchscope]) -> do 826 | return . Just $ Fence (decodeOrdering ordering) (decodeSynchScope synchscope) 827 | where 828 | decodeOrdering :: BC.Val -> AtomicOrdering 829 | decodeOrdering = toEnum' 830 | decodeSynchScope :: BC.Val -> AtomicSynchScope 831 | decodeSynchScope = toEnum' 832 | 833 | -- 37 834 | -- (INST_CMPXCHG_OLD, [ptrty, ptr, cmp, new, align, vol, ordering, synchscope]) 835 | -- 38 836 | (INST_ATOMICRMW, [ ptr, val, op, vol, ordering, synchscope]) -> do 837 | ref <- getRelativeVal rs ptr 838 | val <- getRelativeVal rs val 839 | return . Just $ AtomicRMW ref val (toEnum' op) (decodeOrdering ordering) (decodeSynchScope synchscope) 840 | where 841 | decodeOrdering :: BC.Val -> AtomicOrdering 842 | decodeOrdering = toEnum' 843 | decodeSynchScope :: BC.Val -> AtomicSynchScope 844 | decodeSynchScope = toEnum' 845 | 846 | -- 39 847 | -- (INST_RESUME, [opval]) 848 | -- 40 849 | -- (INST_LANDINGPAD_OLD, [ty, val, val, num, id0, val0, ...]) 850 | -- 41 851 | (INST_LOADATOMIC, [ ptr, opty, align, vol, ordering, synchscope]) -> do 852 | oTy <- askType opty 853 | ref <- getRelativeVal rs ptr 854 | return . Just $ AtomicLoad oTy ref (2^(align-1)) (toEnum' ordering) (toEnum' synchscope) 855 | -- 42 856 | -- (INST_STOREATOMIC_OLD, [ptrty, ptr, val, align, vol, odering, synchscope]) 857 | -- 43 858 | -- TODO: also GEP_OLD, and INBOUNDS_GEP_OLD, same parse though. 859 | (INST_GEP, (inbounds:opty:vs)) -> do 860 | oTy <- askType opty 861 | (val:idxs) <- mapM (getRelativeVal rs) vs 862 | return . Just $ I.Gep (lift oTy) (inbounds /= 0) val idxs 863 | -- 44 864 | (INST_STORE, [ ptr, val, align, vol ]) -> do 865 | ref <- getRelativeVal rs ptr 866 | val <- getRelativeVal rs val 867 | return . Just $ Store ref val (2^(align-1)) 868 | -- 45 869 | (INST_STOREATOMIC, [ ptr, val, align, vol, ordering, synchscope ]) -> do 870 | ref <- getRelativeVal rs ptr 871 | val <- getRelativeVal rs val 872 | return . Just $ AtomicStore ref val (2^(align-1)) (toEnum' ordering) (toEnum' synchscope) 873 | -- 46 874 | (INST_CMPXCHG, [ ptr, cmp, new, vol, ordering, synchscope, failureOrdering, weak ]) -> do 875 | ref <- getRelativeVal rs ptr 876 | cmp <- getRelativeVal rs cmp 877 | new <- getRelativeVal rs new 878 | unless (ty cmp == ty new) $ pure $ error "Invalid record: CMP2 lhs and rhs types do not agree." 879 | 880 | return . Just $ CmpXchg ref cmp new (decodeOrdering ordering) 881 | (decodeSynchScope synchscope) 882 | (decodeOrdering failureOrdering) 883 | where 884 | decodeOrdering :: BC.Val -> AtomicOrdering 885 | decodeOrdering = toEnum' 886 | decodeSynchScope :: BC.Val -> AtomicSynchScope 887 | decodeSynchScope = toEnum' 888 | -- 47 889 | -- (INST_LANDINGPAD, [ ty, val, num, id0, val0, ...]) 890 | -- 48 891 | -- (INST_CLEANUPRET, [val]) 892 | -- (INST_CLEANUPRET, [val, bb#]) 893 | -- 49 894 | -- (INST_CATCHRET, [val, bb#]) 895 | -- 50 896 | -- (INST_CATCHPAD, [bb#, bb#, num, args...]) 897 | -- 51 898 | -- (INST_CLEANUPPAD, [num, args...]) 899 | -- 52 900 | -- (INST_CATCHSWITCH, [num, args...]) 901 | -- (INST_CATCHSWITCH, [num, args..., bb]) 902 | -- 53, 54 - Unused 903 | -- (OPERAND_BUNDLE, vals) 904 | -- ignore all other instructions for now. 905 | r -> fail $ "Encountered unhandled instruction " ++ show r 906 | 907 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Function.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE RecordWildCards #-} 2 | {-# LANGUAGE FlexibleInstances #-} 3 | {-# LANGUAGE TypeSynonymInstances #-} 4 | {-# LANGUAGE DeriveGeneric #-} 5 | 6 | module Data.BitCode.LLVM.Function where 7 | 8 | import Data.BitCode.LLVM.Types 9 | 10 | import Data.BitCode.LLVM.Value (Symbol, HasLinkage(..)) 11 | import Data.BitCode.LLVM.Instruction (Inst) 12 | import Data.Binary (Binary) 13 | 14 | import GHC.Generics (Generic) 15 | 16 | -- | Function declarations are set of so called basic blocks, 17 | -- which contain sets of instructions. These blocks may have 18 | -- labels. 19 | type BlockInst = (Maybe Symbol, Inst) 20 | 21 | imap :: (Inst -> Inst) -> BlockInst -> BlockInst 22 | imap f (x, y) = (x, f y) 23 | 24 | -- TODO: Reused Named here? 25 | data NamedBlock x 26 | = BasicBlock x 27 | | NamedBlock Label x 28 | deriving (Show, Eq, Generic) 29 | 30 | instance Functor NamedBlock where 31 | fmap f (BasicBlock x) = BasicBlock (f x) 32 | fmap f (NamedBlock l x) = NamedBlock l (f x) 33 | 34 | type BasicBlock = NamedBlock [BlockInst] 35 | 36 | bbmap :: ([BlockInst] -> [BlockInst]) -> BasicBlock -> BasicBlock 37 | bbmap f (BasicBlock bi) = (BasicBlock (f bi)) 38 | bbmap f (NamedBlock n bi) = (NamedBlock n (f bi)) 39 | 40 | bimap :: (BlockInst -> BlockInst) -> BasicBlock -> BasicBlock 41 | bimap f (BasicBlock bi) = (BasicBlock (map f bi)) 42 | bimap f (NamedBlock n bi) = (NamedBlock n (map f bi)) 43 | 44 | -- | Function definitions. 45 | -- TODO: dSig is somewhat ugly, I'd lke enforce only function values here. 46 | data Function = Function { dSig :: Symbol, dConst :: [Symbol], dBody :: [BasicBlock] } 47 | deriving (Show, Eq, Generic) 48 | 49 | instance HasLinkage Function where 50 | getLinkage = getLinkage . dSig 51 | setLinkage l f = f { dSig = setLinkage l (dSig f) } 52 | 53 | fbmap :: (BasicBlock -> BasicBlock) -> Function -> Function 54 | fbmap f x@(Function{..}) = x { dBody = map f dBody } 55 | 56 | -- instance Binary a => Binary (NamedBlock a) 57 | -- instance Binary Function 58 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/IDs/Blocks.hs: -------------------------------------------------------------------------------- 1 | module Data.BitCode.LLVM.IDs.Blocks where 2 | -- | The only top-level block type defined is for a module. 3 | data ModuleBlockID 4 | -- | Skip the first 8 blocks. To make the offset at FIRST_APPLICATION_BLOCKID (8). 5 | = SKIP_0 | SKIP_1 | SKIP_2 | SKIP_3 | SKIP_4 | SKIP_5 | SKIP_6 | SKIP_7 6 | -- | Blocks 7 | | MODULE -- FIRST_APPLICATION_BLOCKID (8) 8 | -- | Module sub-block id's. 9 | | PARAMATTR -- 9 10 | | PARAMATTR_GROUP -- 10 11 | | CONSTANTS -- 11 12 | | FUNCTION -- 12 13 | -- | Block intended to contains information on the bitcode versioning. 14 | -- Can be used to provide better error messages when we fail to parse a 15 | -- bitcode file. 16 | | IDENTIFICATION -- 13 17 | | VALUE_SYMTAB -- 14 18 | | METADATA -- 15 19 | | METADATA_ATTACHMENT_ID -- 16 20 | | TYPE_NEW -- 17 21 | | USELIST -- 18 22 | | MODULE_STRTAB -- 19 23 | | FUNCTION_SUMMARY -- 20 24 | | OPERAND_BUNDLE_TAGS -- 21 25 | | METADATA_KIND -- 22 26 | | STRTAB -- 23 27 | | FULL_LTO_GLOBAL_SUMMARY -- 24 28 | | SYMTAB -- 24 29 | | SYNC_SCOPE_NAMES -- 25 30 | deriving (Show, Enum) 31 | 32 | -- x = ( Ident {iString = "APPLE_1_703.0.31_0", iEpoch = Current} 33 | -- , Module 34 | -- { mVersion = 1 35 | -- , mTriple = Just "x86_64-apple-macosx10.11.0" 36 | -- , mDatalayout = Just "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 37 | -- , mValues = [ Global {gPointerType = Array {teNumElts = 13, teEltTy = Int {teWidth = 8}}, gIsConst = True, gAddressSpace = 0, gInitId = Just 3, gLinkage = ExternalWeak, gParamAttrs = 1, gSection = 0, gVisibility = Default, gThreadLocal = NotThreadLocal, gUnnamedAddr = True, gExternallyInitialized = False, gDLLStorageClass = Default, gComdat = 0} 38 | -- , Function {fType = Function {teVarArg = False, teRetTy = Int {teWidth = 32}, teParamTy = [Int {teWidth = 32},Ptr {teAddressSpace = 0, tePointeeTy = Ptr {teAddressSpace = 0, tePointeeTy = Int {teWidth = 8}}}]}, fCallingConv = C, fIsProto = False, fLinkage = External, fParamAttrs = 1, fAlignment = 0, fSection = 0, fVisibility = Default, fGC = 0, fUnnamedAddr = False, fPrologueData = 0, fDLLStorageClass = Default, fComdat = 0, fPrefixData = 0, fPersonalityFn = 0} 39 | -- , Function {fType = Function {teVarArg = True, teRetTy = Int {teWidth = 32}, teParamTy = [Ptr {teAddressSpace = 0, tePointeeTy = Int {teWidth = 8}}]}, fCallingConv = C, fIsProto = True, fLinkage = External, fParamAttrs = 2, fAlignment = 0, fSection = 0, fVisibility = Default, fGC = 0, fUnnamedAddr = False, fPrologueData = 0, fDLLStorageClass = Default, fComdat = 0, fPrefixData = 0, fPersonalityFn = 0} 40 | -- , Constant (Array {teNumElts = 13, teEltTy = Int {teWidth = 8}}) (CString "hello world\n") 41 | -- , Constant (Int {teWidth = 32}) (Int 2) 42 | -- , Constant (Int {teWidth = 32}) (Int 4) 43 | -- ] 44 | -- , mDecls = [ Function {fType = Function {teVarArg = True, teRetTy = Int {teWidth = 32}, teParamTy = [Ptr {teAddressSpace = 0, tePointeeTy = Int {teWidth = 8}}]}, fCallingConv = C, fIsProto = True, fLinkage = External, fParamAttrs = 2, fAlignment = 0, fSection = 0, fVisibility = Default, fGC = 0, fUnnamedAddr = False, fPrologueData = 0, fDLLStorageClass = Default, fComdat = 0, fPrefixData = 0, fPersonalityFn = 0} ] 45 | -- , mFns = [ Function 46 | -- { dSig = Function {fType = Function {teVarArg = False, teRetTy = Int {teWidth = 32}, teParamTy = [Int {teWidth = 32},Ptr {teAddressSpace = 0, tePointeeTy = Ptr {teAddressSpace = 0, tePointeeTy = Int {teWidth = 8}}}]}, fCallingConv = C, fIsProto = False, fLinkage = External, fParamAttrs = 1, fAlignment = 0, fSection = 0, fVisibility = Default, fGC = 0, fUnnamedAddr = False, fPrologueData = 0, fDLLStorageClass = Default, fComdat = 0, fPrefixData = 0, fPersonalityFn = 0} 47 | -- , dConst = [ Constant (Int {teWidth = 32}) Null 48 | -- , Constant (Ptr {teAddressSpace = 0, tePointeeTy = Int {teWidth = 8}}) (InboundsGep [1,2,0,3,8,3,8]) 49 | -- ] 50 | -- , dBody = BasicBlock [ Alloca (Int {teWidth = 32}) (Constant (Int {teWidth = 32}) (Int 2)) 0 51 | -- , Alloca (Ptr {teAddressSpace = 0, tePointeeTy = Ptr {teAddressSpace = 0, tePointeeTy = Int {teWidth = 8}}}) (Constant (Int {teWidth = 32}) (Int 2)) 0 52 | -- , Store (TRef (Int {teWidth = 32}) 0) (Arg (Int {teWidth = 32})) 4 53 | -- , Store (TRef (Ptr {teAddressSpace = 0, tePointeeTy = Ptr {teAddressSpace = 0, tePointeeTy = Int {teWidth = 8}}}) 1) (Arg (Ptr {teAddressSpace = 0, tePointeeTy = Ptr {teAddressSpace = 0, tePointeeTy = Int {teWidth = 8}}})) 8 54 | -- , Call (Function {teVarArg = True, teRetTy = Int {teWidth = 32}, teParamTy = [Ptr {teAddressSpace = 0, tePointeeTy = Int {teWidth = 8}}]}) (Function {fType = Function {teVarArg = True, teRetTy = Int {teWidth = 32}, teParamTy = [Ptr {teAddressSpace = 0, tePointeeTy = Int {teWidth = 8}}]}, fCallingConv = C, fIsProto = True, fLinkage = External, fParamAttrs = 2, fAlignment = 0, fSection = 0, fVisibility = Default, fGC = 0, fUnnamedAddr = False, fPrologueData = 0, fDLLStorageClass = Default, fComdat = 0, fPrefixData = 0, fPersonalityFn = 0}) [Constant (Array {teNumElts = 13, teEltTy = Int {teWidth = 8}}) (CString "hello world\n")] 55 | -- , Ret (Just (Constant (Int {teWidth = 32}) Null))] 56 | -- } 57 | -- ] 58 | -- } 59 | -- ) 60 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Instruction.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveGeneric #-} 2 | module Data.BitCode.LLVM.Instruction where 3 | 4 | import Data.BitCode.LLVM.Types 5 | 6 | import Data.BitCode.LLVM.Type (Ty) 7 | import Data.BitCode.LLVM.Value (Symbol) 8 | import Data.BitCode.LLVM.Cmp (Predicate) 9 | import Data.BitCode.LLVM.RMWOperations (RMWOperations) 10 | 11 | import Data.BitCode.LLVM.Opcodes.Binary (BinOp) 12 | import Data.BitCode.LLVM.Opcodes.Cast (CastOp) 13 | import Data.BitCode.LLVM.CallingConv (CallingConv) 14 | import Data.BitCode.LLVM.Flags (Flag) 15 | 16 | import Data.BitCode.LLVM.Codes.AtomicOrdering (AtomicOrdering) 17 | import Data.BitCode.LLVM.Codes.SynchronizationScope (AtomicSynchScope) 18 | 19 | import GHC.Generics (Generic) 20 | import Data.Binary (Binary) 21 | import Data.Word (Word64) 22 | 23 | data TailCallKind = None | Tail | MustTail | NoTail deriving (Eq, Show, Generic) 24 | 25 | instance Binary TailCallKind 26 | 27 | data Inst 28 | -- | Ty and Value type should match up. If Ty is Metadata, then the Value is takes from the Metadata List 29 | -- else from the value list. 30 | = BinOp Ty BinOp Symbol Symbol [Flag] 31 | | Cast Ty CastOp Symbol 32 | | Alloca Ty Symbol Align -- this would produce a typed ref. Ref, Alignment of 0 means, the backend can choose an appropriate alignment. 33 | -- | Load instruction 34 | | Load Ty Symbol Align 35 | -- | Store instruction. Store the Value in the Typed Ref. 36 | | Store Symbol Symbol Align 37 | -- | Call instruction. (Ty :: Ty, Fn :: Symbol, args :: [Symbol]) 38 | | Call { cTy :: Ty -- ^ result type 39 | , cTCK :: TailCallKind -- ^ tail call; should default to None 40 | , cCC :: CallingConv -- ^ the calling convention 41 | , cSym :: Symbol -- ^ function or reference 42 | -- TODO: if we make the function signature optional (Maybe Ty), could that produce an implicity type? 43 | , cSig :: Ty -- ^ function signature -- e.g. this should be equivalent to @lower . ty@ 44 | , cArgs:: [Symbol] -- ^ arguments 45 | } 46 | -- | Compare 47 | | Cmp2 Ty Symbol Symbol Predicate 48 | -- | GEP 49 | | Gep 50 | Ty -- ^ base type 51 | Bool -- ^ inbounds 52 | Symbol -- ^ Value indexed into 53 | [Symbol] -- ^ indices. 54 | -- | Extract value 55 | | ExtractValue 56 | Symbol -- ^ Value indexed into 57 | [Word64] -- ^ indices. 58 | -- | Return Terminator 59 | | Ret (Maybe Symbol) 60 | -- | Unconditional branch 61 | | UBr BasicBlockId 62 | -- | Conditional branch 63 | | Br Symbol BasicBlockId BasicBlockId 64 | -- | Switch 65 | | Switch Symbol BasicBlockId [(Symbol, BasicBlockId)] 66 | -- | Atomic ops 67 | | Fence AtomicOrdering AtomicSynchScope 68 | | CmpXchg Symbol Symbol Symbol AtomicOrdering AtomicSynchScope {- failure ord: -} AtomicOrdering 69 | | AtomicRMW Symbol Symbol RMWOperations {- volatility -} AtomicOrdering AtomicSynchScope 70 | | AtomicStore Symbol Symbol Align {- volatility -} AtomicOrdering AtomicSynchScope 71 | | AtomicLoad Ty Symbol Align {- volatility -} AtomicOrdering AtomicSynchScope 72 | deriving (Show, Eq, Generic) 73 | 74 | -- instance Binary Inst 75 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Linkage.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveGeneric #-} 2 | module Data.BitCode.LLVM.Linkage where 3 | 4 | import GHC.Generics (Generic) 5 | import Data.Binary (Binary) 6 | 7 | -- see @include/llvm/IR/GlobalValue.h@ 8 | data Linkage 9 | -- | Externally visible function 10 | = External -- 0 11 | -- Obsolete 12 | | Obsolete_Implicit_Comdat -- 1 13 | -- | Special purpose, only applies to global arrays 14 | | Appending -- 2 15 | -- | Rename collisions when linking (static functions). 16 | | Internal -- 3 17 | -- Obsolete 18 | | Obsolete_ImplicitComdat2 -- 4 19 | -- Obsolete DLLImportLinkage 20 | | Obsolete_DLLImportLinkage -- 5 21 | -- Obsolete DLLExportLinkage 22 | | Obsolete_DLLExportLinkage -- 6 23 | -- | ExternalWeak linkage description. 24 | | ExternalWeak -- 7 25 | -- | Tentative definitions. 26 | | Common -- 8 27 | -- | Like Internal, but omit from symbol table. 28 | | Private -- 9 29 | -- Obsolete 30 | | Obsolete_Implicit_Comdat3 -- 10 31 | -- Obsolete 32 | | Obsolete_Implicit_Comdat4 -- 11 33 | -- | Available for inspection, not emission. 34 | | AvailableExternally -- 12 35 | -- Obsolete 36 | | Obsolete_LinkerPrivateLinkage -- 13 37 | -- Obsolete 38 | | Obsolete_LinkerPrivateWeakLinkage -- 14 39 | -- Obsolete 40 | | Obsolete_LinkOnceODRAutoHideLinkage -- 15 41 | -- | Keep one copy of named function when linking (weak) 42 | | WeakAny -- 16 43 | -- | Same, but only replaced by something equivalent. 44 | | WeakODR -- 17 45 | -- | Keep one copy of function when linking (inline) 46 | | LinkOnceAny -- 18 47 | -- | Same, but only replaced by something equivalent. 48 | | LinkOnceODR -- 19 49 | deriving (Enum, Eq, Ord, Show, Generic) 50 | 51 | instance Binary Linkage 52 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Metadata.hs: -------------------------------------------------------------------------------- 1 | module Data.BitCode.LLVM.Metadata where 2 | 3 | import Data.Word (Word64) 4 | 5 | import Data.BitCode.LLVM.Type (Ty) 6 | 7 | data Metadata 8 | = MDString String 9 | | MDValue Ty Word64 10 | | MDNode [Metadata] 11 | | MDName String 12 | | MDDistinctNode [Metadata] 13 | -- | MDKind this is handled as (Int, String) map. 14 | | MDLocation { mdLocDistinct :: Bool, mdLocLine :: Word64, mdLocCol :: Word64, mdLocScope :: Word64, mdLocInlinedAt :: Word64 } 15 | | MDNamedNode String [Metadata] -- Nodes. 16 | -- TODO: Many more, see @Codes/Metadata.hs@ 17 | deriving (Show) 18 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Opcodes/Binary.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveGeneric #-} 2 | module Data.BitCode.LLVM.Opcodes.Binary where 3 | 4 | import GHC.Generics (Generic) 5 | import Data.Binary (Binary) 6 | 7 | -- | BinaryOpcodes - These are values used in the bitcode files to encode which 8 | -- binop a CST_CODE_CE_BINOP or a XXX refers to. The values of these enums 9 | -- have no fixed relation to the LLVM IR enum values. Changing these will 10 | -- break compatibility with old files. 11 | data BinOp 12 | = ADD -- 0 13 | | SUB -- 1 14 | | MUL -- 2 15 | | UDIV -- 3 16 | -- | overloaded for FP 17 | | SDIV -- 4 18 | | UREM -- 5 19 | -- | overloaded for FP 20 | | SREM -- 6 21 | | SHL -- 7 22 | | LSHR -- 8 23 | | ASHR -- 9 24 | | AND -- 10 25 | | OR -- 11 26 | | XOR -- 12 27 | deriving (Show, Enum, Eq, Ord, Generic) 28 | 29 | instance Binary BinOp 30 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Opcodes/Cast.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveGeneric #-} 2 | module Data.BitCode.LLVM.Opcodes.Cast where 3 | 4 | import GHC.Generics (Generic) 5 | import Data.Binary (Binary) 6 | 7 | -- | CastOpcodes - These are values used in the bitcode files to encode which 8 | -- cast a CST_CODE_CE_CAST or a XXX refers to. The values of these enums 9 | -- have no fixed relation to the LLVM IR enum values. Changing these will 10 | -- break compatibility with old files. 11 | data CastOp 12 | = TRUNC -- 0 13 | | ZEXT -- 1 14 | | SEXT -- 2 15 | | FPTOUI -- 3 16 | | FPTOSI -- 4 17 | | UITOFP -- 5 18 | | SITOFP -- 6 19 | | FPTRUNC -- 7 20 | | FPEXT -- 8 21 | | PTRTOINT -- 9 22 | | INTTOPTR -- 10 23 | | BITCAST -- 11 24 | | ADDRSPACECAST -- 12 25 | deriving (Show, Enum, Eq, Ord, Generic) 26 | 27 | instance Binary CastOp 28 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/ParamAttr.hs: -------------------------------------------------------------------------------- 1 | module Data.BitCode.LLVM.ParamAttr where 2 | 3 | import Data.BitCode.LLVM.Codes.AttributeKind (AttributeKind) 4 | 5 | import Data.Word (Word64) 6 | 7 | 8 | data ParamAttrGroupIdx 9 | = Ret 10 | | Fun 11 | | Param Word64 12 | deriving (Show) 13 | 14 | data ParamAttrEntry 15 | = Kind AttributeKind 16 | | Align Word64 17 | | StackAlign Word64 18 | | Pair String (Maybe String) 19 | deriving (Show) 20 | 21 | data ParamAttrGroupEntry = GroupEntry { pgIdx :: ParamAttrGroupIdx 22 | , pgAttrs :: [ParamAttrEntry] 23 | } deriving Show 24 | 25 | type Idx = Int 26 | type GroupIdx = Idx 27 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Pretty.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleInstances #-} 2 | {-# LANGUAGE IncoherentInstances #-} 3 | {-# LANGUAGE TypeSynonymInstances #-} 4 | {-# LANGUAGE FlexibleContexts #-} 5 | {-# LANGUAGE RecordWildCards #-} 6 | module Data.BitCode.LLVM.Pretty where 7 | 8 | import Prelude hiding ((<>)) 9 | 10 | import Data.Word (Word64) 11 | import Data.Map.Strict (Map) 12 | import qualified Data.Map.Strict as Map 13 | import qualified Data.Set as Set 14 | import Data.Maybe (fromMaybe) 15 | import Data.Char (toLower) 16 | import Text.PrettyPrint 17 | import Data.BitCode.LLVM 18 | import Data.BitCode.LLVM.Value as V 19 | import Data.BitCode.LLVM.Type as T 20 | import Data.BitCode.LLVM.Function as F 21 | import Data.BitCode.LLVM.Instruction as I 22 | import Data.BitCode.LLVM.CallingConv as C 23 | import Data.BitCode.LLVM.Codes.AtomicOrdering (AtomicOrdering) 24 | import Data.BitCode.LLVM.Codes.SynchronizationScope (AtomicSynchScope) 25 | -- ----------------------------------------------------------------------------- 26 | -- Pretty class to simplify things a little 27 | class Pretty a where 28 | pretty :: a -> Doc 29 | 30 | instance (Pretty a) => Pretty [a] where 31 | pretty = hsep . punctuate comma . map pretty 32 | 33 | instance (Pretty a, Pretty b) => Pretty [(a, b)] where 34 | pretty = vcat . map (\(x,y) -> pretty x <+> text "->" <+> pretty y) 35 | 36 | instance Pretty Int where 37 | pretty = int 38 | 39 | -- * Base types 40 | instance {-# OVERLAPS #-} Pretty String where pretty = text 41 | instance Pretty Word64 where pretty = text . show 42 | 43 | instance (Pretty a) => Pretty (Maybe a) where 44 | pretty (Just x) = pretty x 45 | pretty Nothing = empty 46 | 47 | instance (Pretty a, Pretty b) => Pretty (Map a b) where 48 | pretty = pretty . Map.toList 49 | 50 | prefix :: Value -> Doc 51 | prefix (Global{}) = char '@' 52 | prefix (V.Function{..}) | feProto fExtra = text "decl " 53 | | otherwise = text "def " 54 | prefix (Alias{..}) = char '~' 55 | prefix (Constant{}) = text "const " 56 | prefix (Arg{}) = text "arg " 57 | prefix (Value{}) = text "val " 58 | prefix (TRef{}) = text "ref " 59 | prefix (FwdRef i) = text "fwdRef" <+> int (fromIntegral i) 60 | 61 | suffix :: Value -> Doc 62 | suffix (Global{..}) = parens (pretty gInit) <+> text "::" <+> pretty gPointerType 63 | suffix (V.Function{..}) = text "::" <+> pretty fType 64 | suffix (Alias{..}) = empty 65 | suffix (Constant t c) = pretty c <+> text "::" <+> pretty t 66 | suffix (Arg t r) = int r <+> text "::" <+> pretty t 67 | suffix (Value t) = text "::" <+> pretty t 68 | suffix (TRef t r) = int r <+> text "::" <+> pretty t 69 | suffix (FwdRef i) = empty 70 | 71 | -- * Values 72 | instance Pretty Value where 73 | pretty v@(V.Function{..}) | Just prefixData <- fePrefixData fExtra = prefix v <> suffix v $+$ text "Prefix:" <+> pretty prefixData 74 | pretty v = prefix v <> suffix v 75 | 76 | -- * Symbols 77 | instance Pretty Symbol where 78 | pretty (Unnamed _ _ v) = pretty v 79 | pretty (Named n _ _ v) = prefix v <> text n <+> suffix v 80 | pretty (Lazy n _ _) = text "lazy" <+> char '@' <> text n 81 | 82 | -- * Types 83 | instance Pretty Ty where 84 | pretty Void = text "()" 85 | pretty T.Float = text "float" 86 | pretty Double = text "double" 87 | pretty (Opaque n) = text "opaque" <+> text n 88 | pretty (T.Int w) = char 'i' <> pretty w 89 | pretty (Ptr _ f@(T.Function{})) = parens (pretty f) <> char '*' 90 | pretty (Ptr _ t) = pretty t <> char '*' 91 | pretty (T.Array n t) = brackets $ pretty n <+> char 'x' <+> pretty t 92 | pretty (T.Vector n t) = char '<' <> pretty n <+> char 'x' <+> pretty t <> char '>' 93 | pretty (T.Function False retTy pTy) = parens (hsep (punctuate comma (map pretty pTy))) <+> text "->" <+> pretty retTy 94 | pretty (T.Function True retTy pTy) = parens (hsep (punctuate comma ((map pretty pTy) ++ [text "..."]))) <+> text "->" <+> pretty retTy 95 | -- use braces for structs. 96 | pretty (T.StructNamed n _ els) = text "struct" <+> text n <+> braces (hsep (punctuate comma (map pretty els))) 97 | pretty (T.StructAnon _ els) = text "struct" <+> braces (hsep (punctuate comma (map pretty els))) 98 | pretty x = error $ "Can not pretty print Ty: " ++ show x 99 | 100 | -- * Constants 101 | instance Pretty Const where 102 | pretty Null = text "null" 103 | pretty Undef = text "undef" 104 | pretty (V.Int n) = int n 105 | pretty (V.WideInt ns) = brackets $ hsep (map int ns) 106 | pretty (V.Float f) = text (show f) 107 | pretty (V.String s) = doubleQuotes $ text (escape s) 108 | pretty (V.CString s) = doubleQuotes $ text (escape s) <> text "\\0" 109 | pretty (V.InboundsGep t idxs) = parens $ text "getElemenentPointer inbounds" <+> parens (hsep . punctuate (text " !!") $ map pretty idxs) <+> text "::" <+> pretty t 110 | pretty (V.Struct vals) = text "struct" <+> braces (hsep (punctuate comma (map pretty vals))) 111 | pretty (V.Cast t op v) = text "cast" <+> text (map toLower (show op)) <+> pretty v <+> text "to" <+> pretty t 112 | pretty (V.BinOp op lhs rhs) = parens (pretty lhs) <+> text (map toLower $ show op) <+> parens (pretty rhs) 113 | pretty (V.Array vals) = brackets (hsep (punctuate comma (map pretty vals))) 114 | pretty x = error $ "Can not pretty print Const: " ++ show x 115 | 116 | escape :: String -> String 117 | escape [] = [] 118 | escape (h:t) | h == '\n' = '\\':'n':escape t 119 | | otherwise = h:escape t 120 | 121 | prettyIndexed :: Pretty a => [a] -> [Doc] 122 | prettyIndexed = map pretty' . zip [0..] 123 | where pretty' (n, p) = int n $$ nest 4 (colon <+> pretty p) 124 | 125 | prettyWithIndex :: [Symbol] -> [Doc] 126 | prettyWithIndex = map pretty' 127 | where pretty' :: Symbol -> Doc 128 | pretty' s@(Unnamed (Indexed _ i) _t _v) = int (fromIntegral (i 0)) $$ nest 4 (colon <+> pretty s) 129 | pretty' s@(Named _ (Indexed _ i) _t _v) = int (fromIntegral (i 0)) $$ nest 4 (colon <+> pretty s) 130 | pretty' s@(Lazy _ _ _) = text "lazy" $$ nest 4 (colon <+> pretty s) 131 | 132 | -- * Functions (Basic Blocks) 133 | instance Pretty F.Function where 134 | pretty (F.Function{..}) = pretty dSig 135 | $$ text "Constants" <+> parens (int (length dConst)) $$ nest 3 (vcat (prettyIndexed dConst)) 136 | $+$ text "Blocks" <+> parens (int (length dBody)) $$ nest 3 (vcat (prettyIndexed dBody)) 137 | 138 | instance Pretty BasicBlock where 139 | pretty (BasicBlock insts) = vcat (map pretty insts) 140 | 141 | instance Pretty (Maybe Symbol, Inst) where 142 | pretty (Just (Unnamed _ _ (TRef t r)), inst) = text "ref" <+> int r <+> text "<-" <+> pretty inst 143 | pretty (Just (Named n _ _ (TRef t r)), inst) = text n <+> parens (text "ref" <+> int r) <+> text "<-" <+> pretty inst 144 | pretty (Nothing, inst) = pretty inst 145 | 146 | instance Pretty CallingConv where 147 | pretty c = text (map toLower (show c)) <> text "call" 148 | 149 | instance Pretty TailCallKind where 150 | pretty None = empty 151 | pretty Tail = text "tail" 152 | pretty MustTail = text "must tail" 153 | pretty NoTail = text "no tail" 154 | 155 | instance Pretty Inst where 156 | pretty (I.BinOp t o l r fs) = parens (pretty l) <+> text (map toLower $ show o) <+> vcat (map (text . map toLower . show) fs) <+> parens (pretty r) <+> text "::" <+> pretty t 157 | pretty (I.Cast t op v) = text "cast" <+> text (map toLower (show op)) <+> parens (pretty v) <+> text "::" <+> pretty t 158 | pretty (Alloca t v _) = text "alloca" <+> parens (pretty v) <+> text "::" <+> pretty t 159 | pretty (Load t v _) = text "load" <+> parens (pretty v) <+> text "::" <+> pretty t 160 | pretty (Store v r _) = pretty r <+> text "->" <+> pretty v 161 | pretty (Call t tck cc s fty args) 162 | | f'@(V.Function{}) <- symbolValue s = pretty tck <+> pretty cc <+> fromMaybe (char 'f') (text <$> symbolName s) <> parens (hsep . punctuate comma $ map pretty args) <+> text "::" <+> pretty t 163 | | r'@(V.TRef{}) <- symbolValue s = pretty tck <+> pretty cc <+> pretty r' <> parens (hsep . punctuate comma $ map pretty args) <+> text "::" <+> pretty t 164 | | otherwise = text "WARN: Call without function or ref symbol not yet supported; are you sure you want this?" 165 | pretty (Cmp2 t l r p) = parens (pretty l) <+> text (show p) <+> parens (pretty r) <+> text "::" <+> pretty t 166 | pretty (I.Gep t ib v idxs) = text "getElementPointer" <+> (if ib then text "inbounds" else empty) <+> pretty v <+> text "!!" <+> (hsep . punctuate (text " !!") $ map pretty idxs) 167 | pretty (ExtractValue v idxs) = text "extract value" <+> pretty v <+> text "!!" <+> hcat (map (text . show) idxs) 168 | pretty (Ret v) = text "ret" <+> parens (pretty v) 169 | pretty (UBr bbId) = text "br" <+> pretty bbId 170 | pretty (Br on bbId bbId') = text "br" <+> parens (pretty on) <+> pretty bbId <+> pretty bbId' 171 | pretty (Switch on defBbId cases) = text "case" <+> parens (pretty on) <+> text "of" 172 | $+$ nest 2 (vcat (map (\(val,bbId) -> pretty val <+> text "->" <+> pretty bbId) cases) $+$ text "_ ->" <+> pretty defBbId) 173 | pretty (Fence ord scope) = text "fence" <+> pretty ord <+> pretty scope 174 | pretty (CmpXchg dst cmp new ord scope ford) = text "cmpxchg" 175 | pretty (AtomicRMW dst val _op _ord _scope) = text "atomicRMW" <+> pretty dst <+> pretty val 176 | pretty (AtomicStore v r _ ord scope) = text "atomic" <+> pretty r <+> text "->" <+> pretty v 177 | pretty (AtomicLoad t v _ ord scope) = text "atomic load" <+> parens (pretty v) <+> text "::" <+> pretty t 178 | -- ----------------------------------------------------------------------------- 179 | -- Identification 180 | -- 181 | instance Pretty Ident where 182 | pretty (Ident s e) = text "Ident" 183 | $$ nest 3 (text s <+> int (fromEnum e)) 184 | 185 | -- ----------------------------------------------------------------------------- 186 | -- Atomic Ordering 187 | -- 188 | instance Pretty AtomicOrdering where 189 | pretty = text . map toLower . drop 9 . show 190 | 191 | instance Pretty AtomicSynchScope where 192 | pretty = text . map toLower . show 193 | -- ----------------------------------------------------------------------------- 194 | -- Module 195 | -- 196 | instance Pretty Module where 197 | pretty (Module{..}) = text "Module" 198 | $$ nest 3 ( 199 | text "Version" $$ nest 12 (pretty mVersion) 200 | $+$ text "Triple" $$ nest 12 (pretty mTriple) 201 | $+$ text "Datalayout" $$ nest 12 (pretty mDatalayout) 202 | $+$ text "Constants" <+> parens (int (length mConsts)) $$ nest 3 (vcat (prettyWithIndex mConsts)) 203 | $+$ text "Globals" <+> parens (int (length mValues)) $$ nest 3 (vcat (prettyWithIndex mValues)) 204 | $+$ text "Fn Decls" <+> parens (int (length mDecls)) $$ nest 3 (vcat (prettyWithIndex mDecls)) 205 | $+$ text "Functions" <+> parens (int (length mFns)) $$ nest 3 (vcat (prettyIndexed mFns)) 206 | ) 207 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/RMWOperations.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveGeneric #-} 2 | module Data.BitCode.LLVM.RMWOperations where 3 | 4 | import GHC.Generics (Generic) 5 | import Data.Binary (Binary) 6 | 7 | -- | These are values used in the bitcode files to encode AtomicRMW operations. 8 | -- The values of these enums have no fixed relation to the LLVM IR enum 9 | -- values. Changing these will break compatibility with old files. 10 | data RMWOperations 11 | = XCHG -- 0 12 | | ADD -- 1 13 | | SUB -- 2 14 | | AND -- 3 15 | | NAND -- 4 16 | | OR -- 5 17 | | XOR -- 6 18 | | MAX -- 7 19 | | MIN -- 8 20 | | UMAX -- 9 21 | | UMIN -- 10 22 | deriving (Show, Enum, Eq, Ord, Generic) 23 | 24 | instance Binary RMWOperations 25 | 26 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Reader/Monad.hs: -------------------------------------------------------------------------------- 1 | module Data.BitCode.LLVM.Reader.Monad 2 | ( LLVMReader 3 | , Result 4 | , evalLLVMReader, traceEvalLLVMReader 5 | , tellType, askType 6 | , tellValue, askValue, askValue' 7 | , ask, askTypeList, askValueList, tellValueList 8 | 9 | , tellValueSymbol, askValueSymbolTable, tellValueSymbolTable 10 | 11 | , tellMetadata, askMetadata, popMetadata 12 | , tellMetadataKind, askMetadataKind 13 | , askParamattr, tellParamattr 14 | , askParamattrGroup, tellParamattrGroup 15 | , tellIdent, askIdent 16 | 17 | , tellVersion, askVersion 18 | , tellTriple, tellDataLayout 19 | 20 | , purgeValueList 21 | 22 | , nth 23 | 24 | , trace, traceShow 25 | ) 26 | where 27 | 28 | import Control.Exception (assert) 29 | import Debug.Trace (traceStack) 30 | 31 | import Control.Monad (MonadPlus(..), when) 32 | import Control.Applicative (Alternative(..)) 33 | import Data.Maybe (fromMaybe) 34 | import Data.Word (Word64) 35 | 36 | -- | The LLVM IR essentially describes a @Module@ 37 | -- there can only be one. Hence this reader mostly 38 | -- deals with parsing a single module. 39 | 40 | import Data.BitCode.LLVM -- essential data types 41 | import Data.BitCode.LLVM.Type (Ty) 42 | import Data.BitCode.LLVM.Classes.HasType (ty) 43 | import Data.BitCode.LLVM.Value (Value(FwdRef), Const, Named(..), Symbol, ValueSymbolTable, ValueSymbolEntry, symbolType, symbolValue, symbolName, entryName, symbolIndex) 44 | import Data.BitCode.LLVM.Function (Function) 45 | import Data.BitCode.LLVM.Metadata (Metadata) 46 | import Data.BitCode.LLVM.ParamAttr 47 | 48 | import qualified Debug.Trace as DBG 49 | 50 | type Result a = Either String a 51 | 52 | -- | The module environemnt the reader 53 | -- can access and modify. 54 | 55 | data Ctx = Ctx 56 | { 57 | -- * Global Level 58 | identification :: Ident 59 | , version :: Int 60 | , triple :: String 61 | , dataLayout :: String 62 | , paramattrs :: [[GroupIdx]] -- ^ essentially the attributes are a merge of the groups. 63 | , paramattrGroups :: [(GroupIdx, ParamAttrGroupEntry)] 64 | -- * Module Level 65 | , types :: [Ty] 66 | , metadata :: [Metadata] 67 | , namedMetadata :: [(String, Metadata)] 68 | , metadataKinds :: [(Int, String)] 69 | , symbols :: [Symbol] 70 | , valueSymbolTable :: ValueSymbolTable 71 | -- * Debugging 72 | , _trace :: Bool 73 | } deriving Show 74 | 75 | mkCtx :: Ctx 76 | mkCtx = Ctx (error "Ident") (error "Version") (error "Triple") (error "Datalayout") mempty mempty mempty mempty mempty mempty mempty mempty False 77 | 78 | -- | A strict pair. 79 | data PairS a = PairS { result :: !(Result a) 80 | , ctx :: !Ctx 81 | } deriving Show 82 | 83 | newtype LLVMReader a = LLVM { runLLVMReader :: Ctx -> PairS a } 84 | 85 | evalLLVMReader :: LLVMReader a -> Result a 86 | evalLLVMReader = result . flip runLLVMReader mkCtx 87 | 88 | traceEvalLLVMReader :: LLVMReader a -> Result a 89 | traceEvalLLVMReader = result . flip runLLVMReader mkCtx { _trace = True } 90 | 91 | -- * Functor 92 | instance Functor LLVMReader where 93 | fmap f m = LLVM $ \c -> let PairS a c' = runLLVMReader m c 94 | in PairS (f <$> a) c' 95 | 96 | -- * Applicative 97 | -- TODO: Verify `ap` is correct. Instinctively the two 98 | -- runLLVMReader calls should be independent, but 99 | -- the second depends on the ctx of the evaluation 100 | -- of the first. 101 | instance Applicative LLVMReader where 102 | pure a = LLVM $ \c -> PairS (Right a) c 103 | 104 | m <*> n = LLVM $ \c -> 105 | let PairS f c' = runLLVMReader m c 106 | PairS x c'' = runLLVMReader n c' 107 | in PairS (f <*> x) c'' 108 | 109 | -- * Moand 110 | instance Monad LLVMReader where 111 | m >>= n = LLVM $ \c -> case runLLVMReader m c of 112 | PairS (Right a) c' -> runLLVMReader (n a) c' 113 | PairS (Left err) c' -> PairS (Left err) c' 114 | 115 | fail s = LLVM $ \c -> PairS (Left s) c 116 | 117 | -- * Monad Plus 118 | instance MonadPlus LLVMReader where 119 | mzero = LLVM $ \c -> PairS (Left "") c 120 | m `mplus` n = LLVM $ \c -> let PairS _ c' = runLLVMReader m c 121 | in runLLVMReader n c' 122 | 123 | instance Alternative LLVMReader where 124 | empty = mzero 125 | m <|> n = LLVM $ \c -> case runLLVMReader m c of 126 | PairS (Left _) _ -> runLLVMReader n c 127 | res -> res 128 | whenM :: (Monad m) => m Bool -> m () -> m () 129 | whenM cond f = cond >>= flip when f 130 | 131 | trace :: String -> LLVMReader () 132 | trace = whenM (_trace <$> ask) . DBG.traceM 133 | 134 | traceShow :: (Show a) => a -> LLVMReader b -> LLVMReader b 135 | traceShow x a = trace (show x) >> a 136 | 137 | ask :: LLVMReader Ctx 138 | ask = LLVM $ \c -> PairS (pure c) c 139 | 140 | modify :: (Ctx -> Ctx) -> LLVMReader () 141 | modify f = LLVM $ \c -> PairS (pure ()) (f c) 142 | 143 | nth :: (Integral a) => a -> [b] -> Maybe b 144 | nth n xs | n < 0 = Nothing 145 | | otherwise = case take 1 (drop (fromIntegral n) xs) of 146 | [x] -> Just x 147 | [] -> Nothing 148 | 149 | nth' :: (Integral a) => a -> [b] -> LLVMReader b 150 | nth' n xs = assert (fromIntegral n < length xs) $ fromMaybe (error "Index out of range") . fmap pure . nth n $ xs 151 | 152 | tellType :: Ty -> LLVMReader () 153 | tellType t = modify $ \c -> (c { types = types c ++ [t] }) 154 | askType :: (Integral a) => a -> LLVMReader Ty 155 | askType n = nth' n =<< types <$> ask 156 | 157 | symbolicate :: (Int, ValueSymbolEntry) -> [Symbol] -> [Symbol] 158 | symbolicate (idx, entry) xs = case nth idx xs of 159 | Just s -> take idx xs ++ [Named (entryName entry) (symbolIndex s) (symbolType s) (symbolValue s)] ++ drop (idx+1) xs 160 | Nothing -> xs 161 | 162 | -- | Adds a Value - Symbol element to the lookup table. 163 | tellValueSymbol :: (Int, ValueSymbolEntry) -> LLVMReader () 164 | tellValueSymbol vs = modify $ \c -> c { valueSymbolTable = vs:valueSymbolTable c, symbols = symbolicate vs (symbols c) } 165 | 166 | -- | Return the currently known VST 167 | askValueSymbolTable :: LLVMReader ValueSymbolTable 168 | askValueSymbolTable = valueSymbolTable <$> ask 169 | 170 | -- | Set the VST. 171 | -- Caveat, as values are resolved when the table is set or 172 | -- the value is added, new symbols will only apply to these 173 | -- values. If you asked for a Value prior to setting the VST 174 | -- that value will either have an invalid symbol or no symbol 175 | -- at all. 176 | -- 177 | -- TODO: Use some true Reference style solution or return 178 | -- LLVMReader Value functions for Values that lookup 179 | -- the symbol + value on call. 180 | tellValueSymbolTable :: ValueSymbolTable -> LLVMReader () 181 | tellValueSymbolTable vst = modify $ \c -> c { valueSymbolTable = vst, symbols = foldr symbolicate (symbols c) vst } 182 | 183 | tellValue :: Value -> LLVMReader () 184 | tellValue v = modify $ \c -> let 185 | n = length (symbols c) 186 | s = case lookup n (valueSymbolTable c) of 187 | Just e -> Named (entryName e) undefined (ty v) v 188 | Nothing -> Unnamed undefined (ty v) v 189 | in c { symbols = symbols c ++ [s] } 190 | 191 | askValue :: (Integral a) => Ty -> a -> LLVMReader Symbol 192 | askValue t n = fromMaybe (mkFwdRef t (fromIntegral n)) . nth n . symbols <$> ask 193 | where mkFwdRef :: Ty -> Word64 -> Symbol 194 | mkFwdRef t n = Unnamed undefined t (FwdRef n) 195 | 196 | -- | stricter version of askValue. This will not create 197 | -- a FwdRef if necessary. 198 | askValue' :: (Integral a, Show a) => a -> LLVMReader Symbol 199 | askValue' n = fromMaybe (error $ "unable to find symbol no. " ++ (show n)) 200 | . nth n . symbols <$> ask 201 | 202 | tellMetadata :: Metadata -> LLVMReader () 203 | tellMetadata md = modify $ \c -> c { metadata = metadata c ++ [md] } 204 | askMetadata :: (Integral a) => a -> LLVMReader Metadata 205 | askMetadata n = LLVM $ \c -> PairS (pure $ metadata c !! (fromIntegral n)) c 206 | popMetadata :: LLVMReader Metadata 207 | popMetadata = LLVM $ \c -> let (h:t) = reverse (metadata c) 208 | in PairS (pure h) (c { metadata = reverse t} ) 209 | 210 | tellMetadataKind :: (Int, String) -> LLVMReader () 211 | tellMetadataKind k = LLVM $ \c -> PairS (pure ()) (c { metadataKinds = k:metadataKinds c}) 212 | 213 | askMetadataKind :: (Integral a) => a -> LLVMReader String 214 | askMetadataKind n = LLVM $ \c -> case lookup (fromIntegral n) (metadataKinds c) of 215 | Just v -> PairS (pure v) c 216 | Nothing -> PairS (Left $ "No metadata for id: " ++ show (fromIntegral n)) c 217 | 218 | tellParamattr :: [GroupIdx] -> LLVMReader () 219 | tellParamattr gidx = LLVM $ \c -> PairS (pure ()) (c { paramattrs = paramattrs c ++ [gidx] }) 220 | askParamattr :: (Integral a) => a -> LLVMReader [GroupIdx] 221 | askParamattr n = (\c -> (paramattrs c !! (fromIntegral n))) <$> ask 222 | 223 | tellParamattrGroup :: (GroupIdx, ParamAttrGroupEntry) -> LLVMReader () 224 | tellParamattrGroup g = LLVM $ \c -> PairS (pure ()) (c { paramattrGroups = g:paramattrGroups c }) 225 | 226 | askParamattrGroup :: GroupIdx -> LLVMReader ParamAttrGroupEntry 227 | askParamattrGroup idx = LLVM $ \c -> case lookup idx (paramattrGroups c) of 228 | Just v -> PairS (pure v) c 229 | Nothing -> PairS (Left $ "No paramattr group for idx: " ++ show idx) c 230 | 231 | askTypeList :: LLVMReader [Ty] 232 | askTypeList = types <$> ask 233 | 234 | askValueList :: LLVMReader [Symbol] 235 | askValueList = symbols <$> ask 236 | 237 | tellIdent :: Ident -> LLVMReader () 238 | tellIdent i = modify $ \c -> c { identification = i } 239 | 240 | askIdent :: LLVMReader Ident 241 | askIdent = identification <$> ask 242 | 243 | tellVersion :: (Integral a) => a -> LLVMReader () 244 | tellVersion v = modify $ \c -> c { version = fromIntegral v } 245 | 246 | askVersion :: LLVMReader Int 247 | askVersion = version <$> ask 248 | 249 | tellTriple :: String -> LLVMReader () 250 | tellTriple s = modify $ \c -> c { triple = s } 251 | 252 | tellDataLayout :: String -> LLVMReader () 253 | tellDataLayout s = modify $ \c -> c { dataLayout = s } 254 | 255 | purgeValueList :: (Integral a) => a -> LLVMReader () 256 | purgeValueList n = modify $ \c -> c { symbols = take (fromIntegral n) (symbols c) } 257 | 258 | tellValueList :: [Symbol] -> LLVMReader () 259 | tellValueList ss = modify $ \c -> c { symbols = ss } 260 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/StorageClass.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveGeneric #-} 2 | module Data.BitCode.LLVM.StorageClass where 3 | 4 | import GHC.Generics (Generic) 5 | import Data.Binary (Binary) 6 | 7 | -- see @include/llvm/IR/GlobalValue.h@ 8 | -- | Storage classes of global values for PE targets. 9 | data DLLStorageClass 10 | = Default -- 0 11 | -- | Function to be imported from DLL 12 | | DLLImport -- 1 13 | -- | Function to be accessible from DLL. 14 | | DLLExport -- 2 15 | deriving (Eq, Enum, Ord, Show, Generic) 16 | 17 | instance Binary DLLStorageClass 18 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/ThreadLocalMode.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveGeneric #-} 2 | 3 | module Data.BitCode.LLVM.ThreadLocalMode where 4 | 5 | import GHC.Generics (Generic) 6 | import Data.Binary (Binary) 7 | 8 | -- see @include/llvm/IR/GlobalValue.h@ 9 | data ThreadLocalMode 10 | = NotThreadLocal -- 0 11 | | GeneralDynamicTLSModel 12 | | LocalDynamicTLSModel 13 | | InitialExecTLSModel 14 | | LocalExecTLSModel 15 | deriving (Eq, Enum, Ord, Show, Generic) 16 | 17 | instance Binary ThreadLocalMode 18 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/ToBitCode.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -fprof-auto #-} 2 | {-# LANGUAGE FlexibleInstances #-} 3 | {-# LANGUAGE RecordWildCards #-} 4 | module Data.BitCode.LLVM.ToBitCode where 5 | 6 | import Data.BitCode (NBitCode, mkBlock, mkRec, mkEmptyRec, bitWidth) 7 | import Data.BitCode.LLVM 8 | import Data.BitCode.LLVM.Util 9 | import Data.BitCode.LLVM.Function 10 | import Data.BitCode.LLVM.Classes.HasType 11 | import qualified Data.BitCode.LLVM.Value as V (Const(..), IndexType(..), Value(..), FpValue(..), Named(..), Symbol, symbolValue, FunctionExtra(..), mkUnnamed, symbolIndexType, symbolIndexValue, symbolIndex) 12 | import qualified Data.BitCode.LLVM.Type as T (Ty(..), ftypes, typeCompare) 13 | import qualified Data.BitCode.LLVM.Instruction as I (Inst(..), TailCallKind(..)) 14 | import Data.BitCode.LLVM.Flags 15 | 16 | import Data.BitCode.LLVM.IDs.Blocks 17 | import Data.BitCode.LLVM.RMWOperations 18 | import qualified Data.BitCode.LLVM.Codes.Identification as IC 19 | import qualified Data.BitCode.LLVM.Codes.Module as MC 20 | import qualified Data.BitCode.LLVM.Codes.Type as TC 21 | import qualified Data.BitCode.LLVM.Codes.Constants as CC 22 | import qualified Data.BitCode.LLVM.Codes.Function as FC 23 | import qualified Data.BitCode.LLVM.Codes.ValueSymtab as VST 24 | import qualified Data.Map.Strict as Map 25 | import Data.Map.Strict (Map) 26 | import qualified Data.Set as Set 27 | 28 | import Data.BitCode.LLVM.Classes.ToSymbols 29 | import Data.List (elemIndex, sort, sortBy, groupBy, nub) 30 | import Data.Function (on) 31 | 32 | import Data.Maybe (fromMaybe, catMaybes) 33 | import Data.Word (Word64) 34 | 35 | import Data.Foldable (foldl') 36 | 37 | import Data.Bits (FiniteBits, (.|.), shift, setBit) 38 | 39 | import Debug.Trace 40 | 41 | import GHC.Stack (HasCallStack) 42 | 43 | -- to compute the length of emitted bitcode. 44 | import Data.BitCode (denormalize) 45 | import Data.BitCode.Writer (emitTopLevel) 46 | 47 | import Data.BitCode.LLVM.Pretty hiding (prettyIndexed) 48 | import Data.BitCode.LLVM.Util 49 | import Text.PrettyPrint ((<+>), text, (<>), int, vcat, Doc, ($+$), empty) 50 | import Control.Applicative ((<|>)) 51 | -------------------------------------------------------------------------------- 52 | -- Turn things into NBitCode. 53 | -- 54 | class ToNBitCode a where 55 | toBitCode :: HasCallStack => a -> [NBitCode] 56 | 57 | -- | Rerturns the Identification Block 58 | instance ToNBitCode Ident where 59 | toBitCode (Ident name epoch) 60 | = pure $ mkBlock IDENTIFICATION [ mkRec IC.STRING name 61 | , mkRec IC.EPOCH epoch 62 | ] 63 | 64 | instance (ToNBitCode a) => ToNBitCode (Maybe a) where 65 | toBitCode (Just a) = toBitCode a 66 | toBitCode Nothing = [] 67 | 68 | instance (ToNBitCode a) => ToNBitCode [a] where 69 | toBitCode = concatMap toBitCode 70 | 71 | instance {-# OVERLAPPING #-} ToNBitCode [T.Ty] where 72 | toBitCode tys 73 | = pure $ mkBlock TYPE_NEW (numEntryRec:concatMap mkTypeRec (zip [0..] tys)) 74 | where -- A "safe" type lookup, that ensure we do not forward reference, which 75 | -- is only permissable for named structs. 76 | typeMap = Map.fromList (zip tys [0..]) :: Map T.Ty Word64 77 | typeList = vcat (map p (zip [(0::Int)..] tys)) 78 | where p (i, t) = pretty i <+> text ": " <+> pretty t <+> text (show t) 79 | lookupTypeIndex' :: HasCallStack => Word64 -> T.Ty -> Maybe Word64 80 | lookupTypeIndex' n t = let t' = lookupTypeIndex typeMap t 81 | in if t' < n then Just t' else Nothing 82 | numEntryRec :: NBitCode 83 | numEntryRec = mkRec TC.NUMENTRY (length tys) 84 | mkTypeRec :: (Word64, T.Ty) -> [NBitCode] 85 | mkTypeRec (i, T.Void) = [ mkEmptyRec TC.VOID ] 86 | mkTypeRec (i, T.Float) = [ mkEmptyRec TC.FLOAT ] 87 | mkTypeRec (i, T.Double) = [ mkEmptyRec TC.DOUBLE ] 88 | mkTypeRec (i, T.Label) = [ mkEmptyRec TC.LABEL ] 89 | mkTypeRec (i, (T.Opaque name)) = [ mkRec TC.STRUCT_NAME name, mkEmptyRec TC.OPAQUE ] 90 | mkTypeRec (i, (T.Int w)) = [ mkRec TC.INTEGER [w] ] 91 | mkTypeRec (i, ty@(T.Ptr s t)) 92 | | Just t' <- lookupTypeIndex' i t = [ mkRec TC.POINTER [t', s] ] 93 | | otherwise = error $ "Pointee " ++ show t ++ " must be emitted before " ++ show ty 94 | mkTypeRec (i, T.Half) = [ mkEmptyRec TC.HALF ] 95 | mkTypeRec (i, ty@(T.Array n t)) 96 | | Just t' <- lookupTypeIndex' i t = [ mkRec TC.ARRAY [n, t'] ] 97 | | otherwise = error $ "Array " ++ show ty ++ " must not forward reference " ++ show t 98 | mkTypeRec (i, ty@(T.Vector n t)) 99 | | Just t' <- lookupTypeIndex' i t = [ mkRec TC.VECTOR [n, t'] ] 100 | | otherwise = error $ "Vector " ++ show ty ++ " must not forward reference " ++ show t 101 | mkTypeRec (i, T.X86Fp80) = [ mkEmptyRec TC.X86_FP80 ] 102 | mkTypeRec (i, T.Fp128) = [ mkEmptyRec TC.FP128 ] 103 | mkTypeRec (i, T.Metadata) = [ mkEmptyRec TC.METADATA ] 104 | mkTypeRec (i, T.X86Mmx) = [ mkEmptyRec TC.X86_MMX ] 105 | mkTypeRec (i, ty@(T.StructAnon p ts)) 106 | | Just ts' <- mapM (lookupTypeIndex' i) ts = [ mkRec TC.STRUCT_ANON ((if p then 1 else 0):ts') ] 107 | | otherwise = error $ "Anon Struct " ++ show (pretty ty) ++ " must not forward reference its types " ++ show (pretty ts) 108 | mkTypeRec (i, (T.StructNamed name p ts))= [ mkRec TC.STRUCT_NAME name, mkRec TC.STRUCT_NAMED ((if p then 1 else 0):map (lookupTypeIndex typeMap) ts) ] 109 | 110 | mkTypeRec (i, ty@(T.Function vargs t ts)) 111 | | Just ts' <- mapM (lookupTypeIndex' i) (t:ts) = [ mkRec TC.FUNCTION ((if vargs then 1 else 0):ts') ] 112 | | otherwise = error . show $ text "Function" <+> pretty ty <+> text "must not forward reference its types" <+> pretty (t:ts) 113 | $+$ text "Types:" $+$ typeList 114 | mkTypeRec (i, T.Token) = [ mkEmptyRec TC.TOKEN ] 115 | 116 | 117 | lookupIndexGeneric :: (HasCallStack, Pretty a, Eq a, Show a, Integral b) => [a] -> a -> b 118 | lookupIndexGeneric xs x = case elemIndex x xs of 119 | Just i -> fromIntegral i 120 | Nothing -> error . show $ text "Unable to find" <+> pretty x <+> text "in" <+> pretty xs 121 | 122 | lookupTypeIndex :: HasCallStack => Map T.Ty Word64 -> T.Ty -> Word64 123 | lookupTypeIndex ts t = case Map.lookup t ts of 124 | Just i -> i 125 | Nothing -> error . show $ text "Unable to find type" <+> pretty t <+> text "in" <+> pretty ts 126 | 127 | lookupValueIndex :: (HasCallStack, Integral b) => [V.Value] -> V.Value -> b 128 | lookupValueIndex vs f@(V.Function{..}) = case (elemIndex f vs) of 129 | Just i -> fromIntegral i 130 | Nothing -> error . show $ text "Unable to find function" <+> pretty f <+> text "in" <+> pretty vs 131 | lookupValueIndex vs v = case elemIndex v vs of 132 | Just i -> fromIntegral i 133 | Nothing -> error . show $ text "Unable to find value" <+> pretty v <+> text "in" <+> pretty vs 134 | 135 | lookupSymbolIndex :: HasCallStack => V.Symbol -> Word64 136 | lookupSymbolIndex = V.symbolIndexValue 137 | 138 | lookupValueIndex' :: HasCallStack => Map V.Value Word64 -> V.Value -> Word64 139 | lookupValueIndex' vs v = case Map.lookup v vs of 140 | Just i -> i 141 | Nothing -> error . show $ text "Unable to find value" <+> pretty v <+> text "in" <+> pretty vs 142 | 143 | -------------------------------------------------------------------------- 144 | -- PP Utiltiys 145 | prettyIndexed :: (Pretty a) => [a] -> Doc 146 | prettyIndexed = pretty . zip ([0..] :: [Int]) 147 | traceShowWith :: Show a => (b -> a) -> b -> b 148 | traceShowWith f x = traceShow (f x) x 149 | 150 | -- Value Type, this is needed to ensure we do not 151 | -- rely on the Named / Unnamed type entry, which 152 | -- for labels might be wrong. But instead really 153 | -- use the type of the value. 154 | vty :: V.Symbol -> T.Ty 155 | vty = ty . V.symbolValue 156 | 157 | -- We *can not* have ToNBitCode Module, as we 158 | -- need to know the position in the bitcode stream. 159 | -- And this includes the Indetification :( 160 | instance ToNBitCode (Maybe Ident, Module) where 161 | toBitCode (i, m@(Module{..})) 162 | = concat [ identBlock, 163 | pure . mkBlock MODULE $ 164 | moduleHeader ++ 165 | -- TODO: To support the OffsetLogic in 3.8 and later 166 | -- we would replace the mkSynTabBlock with the VST Fwd Ref. 167 | -- and put the mkSymTabBlock at the end (behind the mkFunctionBlock's.) 168 | -- We would need to be able to figure out the bitcode positions though. 169 | -- NOTE: An initial attempt at that (see nBitCodeLength below), is missing 170 | -- some vital ingredent. See also the CODE_FNENTRY generation. 171 | [ mkSymTabBlock (globalSymbols ++ functionSymbols ++ constantSymbols) ] ++ 172 | (map mkFunctionBlock mFns) ++ 173 | [] 174 | ] 175 | -- = pure $ mkBlock MODULE [ {- Record: Version 1 -} 176 | -- , {- Block: ParamAttrGroup 10 -} 177 | -- , {- Block: ParamAttr 9 -} 178 | -- , {- Block: Types 17 -} 179 | -- , {- Record: Triple 2 -} 180 | -- , {- Record: Datalayout 3 -} 181 | -- , {- Record: GlobalVar 7 -}, ... 182 | -- , {- Record: Function 8 -}, ... 183 | -- , {- Record: VSTOffset 13 -} 184 | -- , {- Block: Constants 11 -} 185 | -- , {- Block: Metadata 15 -}, ... 186 | -- , {- Block: Function 12 -} 187 | -- , {- Block: SymTab 14 -} 188 | -- ] 189 | where 190 | -------------------------------------------------------------------------- 191 | -- Compute the offsets 192 | identBlock = toBitCode i 193 | -- [TODO]: we can only write Version 1 Bitcode. 194 | -- version 2 with the symbol table might 195 | -- be easier. However version two has 196 | -- different record lengths. 197 | moduleHeader = [ mkRec MC.VERSION [(1 :: Int) {-mVersion-}] ] ++ 198 | toBitCode typeList ++ 199 | [ mkRec MC.TRIPLE t | Just t <- [mTriple] ] ++ 200 | [ mkRec MC.DATALAYOUT dl | Just dl <- [mDatalayout] ] ++ 201 | -- NOTE: If we ever want to reference functions or globals 202 | -- in constants (e.g. to set prefix data for functiondeclarations) 203 | -- they must be present when parsing the constants. Hence we 204 | -- generate globals and functions first; then constants. 205 | (map mkGlobalRec (map V.symbolValue globalSymbols)) ++ 206 | (map mkFunctionRec (map V.symbolValue functionSymbols)) ++ 207 | (mkConstBlock constantSymbols) 208 | -- nBitCodeLength :: [NBitCode] -> (Int,Int) 209 | -- nBitCodeLength nbc = evalBitCodeWriter $ (emitTopLevel . map denormalize $ nbc) >> ask 210 | 211 | -------------------------------------------------------------------------- 212 | -- Prelim discussion: 213 | -- mValues will contain Globals and Aliases. 214 | -- mDecls will contain prototype declarations. 215 | -- nFns will contain all the defined functions. 216 | -- 217 | -- The value list will then consist of Globals (Globals and Aliases), 218 | -- followed by prototypes and function definitions; finally we have the 219 | -- constants at the end. 220 | -- 221 | -- Constants however follow from the symbols. E.g. what ever is used 222 | -- in globals, aliases, prototypes and function will need to be part of 223 | -- the constants table. 224 | -------------------------------------------------------------------------- 225 | isDeclaration x 226 | | f@(V.Function{}) <- V.symbolValue x = V.feProto (V.fExtra f) 227 | | otherwise = False 228 | isFunction x 229 | | f@(V.Function{}) <- V.symbolValue x = not (V.feProto (V.fExtra f)) 230 | | otherwise = False 231 | isGlobal x 232 | | (V.Global{}) <- V.symbolValue x = True 233 | | otherwise = False 234 | isAlias x 235 | | (V.Alias{}) <- V.symbolValue x = True 236 | | otherwise = False 237 | isConstant x 238 | | (V.Constant{}) <- V.symbolValue x = True 239 | | otherwise = False 240 | 241 | -- * S Y M B O L S 242 | -- globals 243 | globalSymbols = filter isGlobal mValues 244 | -- TODO: aliases 245 | -- functions (declarations and definitions) 246 | functionSymbols = mDecls ++ mDefns 247 | 248 | -- all symbols; this should break them down so that we can construct them 249 | -- by successivly building on simple building blocks. 250 | -- constants 251 | constantSymbols = mConsts 252 | 253 | -- so the (module) valueList is as follows 254 | -- Order matters, as that is the order in which we put them into the module. 255 | -- 256 | -- TODO: FLAGS: if -dump-valuelist: 257 | -- traceShowWith prettyIndexed $! 258 | valueList :: Map V.Symbol Word64 259 | valueList = Map.fromList $ zip (globalSymbols ++ functionSymbols ++ constantSymbols) [0..] 260 | 261 | -- * T Y P E S 262 | -- all top level types, and all the types to construct them. (e.g. i8** -> i8, i8*, and i8**). 263 | -- topLevelTypes = foldl T.ftypes [] $ map ty flatSymbols 264 | -- all symbols of functions, their constants and bodies. 265 | -- fullFunctionSymbols = fsymbols [] mFns 266 | -- The type list now additionally also contains all types that are 267 | -- part of the function signatures, bodies and constants. 268 | -- 269 | -- TODO: FLAGS: if -dump-typelist: 270 | -- traceShowWith prettyIndexed $! 271 | -- typeListO = foldl T.ftypes topLevelTypes $ map ty fullFunctionSymbols 272 | -- typeList = traceShowWith (\x -> text "ORIGINAL:" 273 | -- $+$ prettyIndexed typeListO 274 | -- $+$ text "OPTIMIZED:" 275 | -- $+$ prettyIndexed x ) $! 276 | typeList = mTypes -- sortBy T.typeCompare (Set.toList mTypes) 277 | typeMap :: Map T.Ty Word64 278 | typeMap = Map.fromList (zip typeList [0..]) 279 | 280 | -- | Turn a set of Constant Values unto BitCode Records. 281 | mkConstBlock :: HasCallStack 282 | => [V.Symbol] -- ^ the constants to turn into BitCode 283 | -> [NBitCode] 284 | mkConstBlock consts | length consts > 0 = [ mkBlock CONSTANTS . 285 | concatMap f $! 286 | -- traceShow "groupBy" . groupBy ((==) `on` (V.cTy . V.symbolValue)) $ 287 | map pure $! 288 | consts ] 289 | | otherwise = [] 290 | where 291 | f :: [V.Symbol] -> [NBitCode] 292 | f [] = [] 293 | f (s:cs) 294 | | (V.Constant t c) <- V.symbolValue s 295 | = (mkRec CC.CST_CODE_SETTYPE (lookupTypeIndex typeMap t)):mkConstRec c:map (mkConstRec . V.cConst . V.symbolValue) cs 296 | | otherwise = error $ "Invalid constant " ++ show s 297 | mkConstRec :: HasCallStack => V.Const -> NBitCode 298 | mkConstRec V.Null = mkEmptyRec CC.CST_CODE_NULL 299 | mkConstRec V.Undef = mkEmptyRec CC.CST_CODE_UNDEF 300 | mkConstRec (V.Int n) = mkRec CC.CST_CODE_INTEGER (fromSigned n) 301 | mkConstRec (V.WideInt ns) = mkRec CC.CST_CODE_WIDE_INTEGER (map fromSigned ns) 302 | 303 | mkConstRec (V.Float (V.FpHalf f)) = mkRec CC.CST_CODE_FLOAT [fromIntegral f :: Word64] 304 | mkConstRec (V.Float (V.FpSingle f)) = mkRec CC.CST_CODE_FLOAT [fromIntegral f :: Word64] 305 | mkConstRec (V.Float (V.FpDouble f)) = mkRec CC.CST_CODE_FLOAT [f] 306 | -- TODO: Support aggregates (lookup value numbers in Constants? + Globals + Functions?) 307 | mkConstRec (V.Struct vals) 308 | | length vals > 0 = mkRec CC.CST_CODE_AGGREGATE ((map lookupSymbolIndex vals) :: [Word64]) 309 | | otherwise = mkRec CC.CST_CODE_NULL ([] :: [Word64]) 310 | mkConstRec (V.Array vals) 311 | | length vals > 0 = mkRec CC.CST_CODE_AGGREGATE ((map lookupSymbolIndex vals) :: [Word64]) 312 | | otherwise = mkRec CC.CST_CODE_NULL ([] :: [Word64]) 313 | mkConstRec (V.Vector vals) 314 | | length vals > 0 = mkRec CC.CST_CODE_AGGREGATE ((map lookupSymbolIndex vals) :: [Word64]) 315 | | otherwise = mkRec CC.CST_CODE_NULL ([] :: [Word64]) 316 | mkConstRec (V.String s) = mkRec CC.CST_CODE_STRING s 317 | mkConstRec (V.CString s) = mkRec CC.CST_CODE_CSTRING s 318 | mkConstRec (V.BinOp op lhs rhs) = mkRec CC.CST_CODE_CE_BINOP [ fromEnum' op 319 | , lookupSymbolIndex lhs 320 | , lookupSymbolIndex rhs 321 | ] 322 | -- NOTE: t is the type we want to cast to (e.g. the CurrentType); the encoded type however is that of the symbol 323 | mkConstRec (V.Cast t op s) = mkRec CC.CST_CODE_CE_CAST [fromEnum' op, lookupTypeIndex typeMap (vty s) 324 | , lookupSymbolIndex s] 325 | -- XXX Gep, Select, ExtractElt, InsertElt, ShuffleVec, Cmp, InlineAsm, ShuffleVecEx, 326 | mkConstRec (V.InboundsGep t symbls) 327 | = mkRec CC.CST_CODE_CE_INBOUNDS_GEP $ (lookupTypeIndex typeMap t):zip' (map (lookupTypeIndex typeMap . vty) symbls) 328 | (map lookupSymbolIndex symbls) 329 | mkConstRec x = error $ "mkConstRec: " ++ show x ++ " not yet implemented!" 330 | 331 | -- signedness encoding. 332 | -- see `toSigned` in FromBitCode. 333 | fromSigned :: (FiniteBits a, Ord a, Num a) => a -> a 334 | fromSigned v | v < 0 = 1 .|. shift (-v) 1 335 | | otherwise = shift v 1 336 | -- XXX BlockAddress, Data, InlineAsm 337 | zip' :: [a] -> [a] -> [a] 338 | zip' [] [] = [] 339 | zip' (h:t) (h':t') = h:h':zip' t t' 340 | 341 | bool :: (Integral a) => Bool -> a 342 | bool x = if x then 1 else 0 343 | 344 | fromEnum' :: (Enum a, Integral b) => a -> b 345 | fromEnum' = fromIntegral . fromEnum 346 | 347 | mkGlobalRec :: HasCallStack => V.Value -> NBitCode 348 | mkGlobalRec (V.Global{..}) = mkRec MC.GLOBALVAR [ lookupTypeIndex typeMap t -- NOTE: We store the pointee type. 349 | , bool gIsConst .|. shift explicitType 1 .|. shift gAddressSpace 2 350 | , fromMaybe 0 ((+1) . lookupSymbolIndex <$> gInit) 351 | , fromEnum' gLinkage 352 | , gParamAttrs 353 | , gSection 354 | , fromEnum' gVisibility 355 | , fromEnum' gThreadLocal 356 | , bool gUnnamedAddr 357 | , bool gExternallyInitialized 358 | , fromEnum' gDLLStorageClass 359 | , gComdat 360 | ] 361 | where (T.Ptr _ t) = gPointerType 362 | explicitType = 1 363 | 364 | mkFunctionRec :: HasCallStack => V.Value -> NBitCode 365 | mkFunctionRec (V.Function{..}) = mkRec MC.FUNCTION [ lookupTypeIndex typeMap t -- NOTE: Similar to Globals we store the pointee type. 366 | , fromEnum' fCallingConv 367 | , bool (V.feProto fExtra) 368 | , fromEnum' fLinkage 369 | , fParamAttrs 370 | , fAlignment 371 | , fSection 372 | , fromEnum' fVisibility 373 | , fGC 374 | , bool fUnnamedAddr 375 | , fromMaybe 0 ((+1) . lookupSymbolIndex <$> (V.fePrologueData fExtra)) 376 | , fromEnum' fDLLStorageClass 377 | , fComdat 378 | , fromMaybe 0 ((+1) . lookupSymbolIndex <$> (V.fePrefixData fExtra)) 379 | , fPersonalityFn 380 | ] 381 | where (T.Ptr _ t) = fType 382 | 383 | -------------------------------------------------------------------------- 384 | -- VALUE SYMBOL TABLE 385 | -- 386 | mkSymTabBlock :: HasCallStack => [V.Symbol] -> NBitCode 387 | -- TODO: drop `catMaybes`, once we support all symbols (FNENTRY, BBENTRY) 388 | mkSymTabBlock syms = mkBlock VALUE_SYMTAB (catMaybes (map mkSymTabRec namedIdxdSyms)) 389 | where namedIdxdSyms = [(idx, name, value) | (idx, (V.Named name _i _t value)) <- zip [0..] syms] 390 | mkSymTabRec :: (Int, String, V.Value) -> Maybe NBitCode 391 | mkSymTabRec (n, nm, (V.Function{..})) | V.feProto fExtra = Just (mkRec VST.VST_CODE_ENTRY (n:map fromEnum nm)) 392 | -- LLVM 3.8 comes with FNENTRY, which has offset at the 393 | -- second position. This however requires computing the 394 | -- offset corret. 395 | -- XXX: VST OFFSETS 396 | | otherwise = Just (mkRec VST.VST_CODE_ENTRY (n:map fromEnum nm)) 397 | -- Just (mkRec VST.VST_CODE_FNENTRY (n:offset-1:map fromEnum nm)) 398 | -- where offset = fst . nBitCodeLength $ [ mkBlock MODULE $ moduleHeader ] 399 | -- XXX: this is ok here, as anything else can just be named constants/globals. 400 | -- We simply can not encounter blocks just yet. 401 | mkSymTabRec (n, nm, _) = Just (mkRec VST.VST_CODE_ENTRY (n:map fromEnum nm)) 402 | -------------------------------------------------------------------------- 403 | -- FUNCTIONS (BasicBlocks) 404 | -- 405 | mkFunctionBlock :: HasCallStack => Function -> NBitCode 406 | {- Declare blocks, constants, instructions, vst -} 407 | mkFunctionBlock (Function sig consts bbs) 408 | = mkBlock FUNCTION $ 409 | [ mkRec FC.DECLAREBLOCKS (length bbs) ] ++ 410 | mkConstBlock fconstants ++ 411 | -- this is a *bit* ugly. 412 | -- We use a fold to carry the instruction count through 413 | -- the record creation. We also prepend records and hence 414 | -- have to reverse them in the end. 415 | -- TODO: Use better mappendable DataStructure. 416 | (reverse . snd $ foldl' mkInstRecFold (0,[]) (concatMap blockInstructions bbs)) 417 | where -- function arguments 418 | fArgTys = funParamTys (V.fType (V.symbolValue sig)) 419 | nArgs :: Word64 420 | nArgs = fromIntegral (length fArgTys) 421 | fArgs = zipWith (\t -> V.mkUnnamed t . V.Arg t) fArgTys [0..] 422 | -- function local constant 423 | fconstants :: [V.Symbol] 424 | fconstants = sortBy (T.typeCompare `on` (V.cTy . V.symbolValue)) 425 | -- ignore any constants that are available globally already 426 | . filter (\x -> not $ x `elem` constantSymbols) 427 | -- only constants 428 | . filter isConst 429 | $ consts 430 | isConst :: V.Symbol -> Bool 431 | isConst c | (V.Constant{}) <- V.symbolValue c = True 432 | | otherwise = False 433 | -- the values the body can reference. 434 | 435 | -- globals, functions, constants, (because we order them that way) 436 | -- plus fargs and fconstants per function body ontop of which are 437 | -- the references generated by the instructions will be placed. 438 | bodyVals :: Map V.Symbol Word64 439 | bodyVals = Map.unionWith (error . show) valueList (Map.fromList $ zip (fArgs ++ fconstants) ([(fromIntegral $ Map.size valueList)..] :: [Word64])) 440 | nValueList :: Word64 441 | nValueList = fromIntegral (Map.size valueList) 442 | 443 | blockInstructions :: HasCallStack => BasicBlock -> [I.Inst] 444 | blockInstructions (BasicBlock insts) = map snd insts 445 | blockInstructions (NamedBlock _ insts) = map snd insts 446 | 447 | -- instruction values (e.g. values generated by instructions) 448 | instVals = zipWith (\t -> V.mkUnnamed t . V.TRef t) [t | Just t <- map instTy (concatMap blockInstructions bbs)] [0..] 449 | 450 | -- These are in FromBitCode as well. TODO: Refactor move BitMasks into a common file. 451 | inAllocMask = shift (1 :: Int) 5 452 | explicitTypeMask = shift (1 :: Int) 6 453 | swiftErrorMask = shift (1 :: Int) 7 454 | 455 | -- Relative Symbol lookup 456 | lookupRelativeSymbolIndex :: (HasCallStack) 457 | => Word64 -- ^ current instruction count 458 | -> V.Symbol -- ^ the symbol to lookup 459 | -> Word64 460 | lookupRelativeSymbolIndex iN s = case V.symbolIndexType s of 461 | V.GlobI -> nValueList + nArgs + iN - V.symbolIndexValue s 462 | V.ArgI -> nArgs + iN - V.symbolIndexValue s 463 | V.InstI -> iN - V.symbolIndexValue s 464 | 465 | lookupRelativeSymbolIndex' :: (HasCallStack) => Word64 -> V.Symbol -> Word64 466 | lookupRelativeSymbolIndex' = lookupRelativeSymbolIndex 467 | 468 | -- Build instructions. 469 | mkInstRec :: HasCallStack => Word64 -> I.Inst -> NBitCode 470 | mkInstRec n (I.BinOp _ op lhs rhs flags) = mkRec FC.INST_BINOP [ lookupRelativeSymbolIndex' n lhs 471 | , lookupRelativeSymbolIndex' n rhs 472 | , fromEnum' op 473 | , fromIntegral $ foldl' setBit (0 :: Int) (map flagValue flags) 474 | ] 475 | -- TODO: There should be valueTypePair :: Int -> Symbol -> [_] 476 | -- which encodes a Symbol that has an index < n just 477 | -- as the symbol, and otherwise as valueNumber, TypeIndex. 478 | -- The `getValueType` function in the reader does precisely this 479 | -- in the inverse way. 480 | mkInstRec n (I.Cast t op s) = mkRec FC.INST_CAST [ lookupRelativeSymbolIndex' n s 481 | -- , lookupTypeIndex typeList (ty s) 482 | , lookupTypeIndex typeMap t 483 | , fromEnum' op 484 | ] 485 | -- TODO: If we want to support InAlloca, we need to extend Alloca. For now we will not set the flag. 486 | mkInstRec n (I.Alloca t s a) = mkRec FC.INST_ALLOCA [ lookupTypeIndex typeMap (lower t) 487 | , lookupTypeIndex typeMap . vty $ s 488 | , lookupSymbolIndex s 489 | , fromIntegral $ explicitTypeMask .|. bitWidth a 490 | ] 491 | -- TODO: Support Volatile flag 492 | -- Verify that t is (lower (ty s)). 493 | mkInstRec n (I.Load _ s a) = mkRec FC.INST_LOAD [ lookupRelativeSymbolIndex' n s 494 | , lookupTypeIndex typeMap . lower . vty $ s 495 | , fromIntegral (bitWidth a) 496 | , 0 497 | ] 498 | -- TODO: Support Volatile flag 499 | mkInstRec n (I.Store ref val a) = mkRec FC.INST_STORE [ lookupRelativeSymbolIndex' n ref 500 | , lookupRelativeSymbolIndex' n val 501 | , fromIntegral (bitWidth a) 502 | , 0 503 | ] 504 | 505 | -- TODO: Support FMF and Explicit Type flags explicitly 506 | -- XXX: Call needs paramAttrs! -- Can use 0 for empty param set. 507 | mkInstRec n (I.Call _ tck cc s fnTy args) 508 | | ty (V.symbolValue s) /= ty s = error $ unlines [ "Error in Call Instruction (ty (symbolValue s) /= ty s)" 509 | , "for symbol: " ++ show s 510 | , "type of symbol (callee) value: " 511 | , show (ty (V.symbolValue s)) 512 | , "type of symbol (callee): " 513 | , show (ty s) 514 | , "function type (caller):" 515 | , show fnTy 516 | , "arguments (" ++ show (length args) ++ ")" 517 | , show args 518 | ] 519 | | ty (V.symbolValue s) /= fnTy = error $ unlines [ "Callees Symbols " 520 | , show s 521 | , "type mismatch; symbolValue ty" 522 | , show (ty (V.symbolValue s)) 523 | , "function signature type" 524 | , show fnTy 525 | , "arguments (" ++ show (length args) ++ ")" 526 | , show args 527 | ] 528 | | otherwise = mkRec FC.INST_CALL $ [ (0 :: Word64) -- Fix PARAMATTR 529 | , cconv .|. tcKind .|. explTy 530 | -- FMF 531 | , lookupTypeIndex typeMap (lower fnTy) 532 | , lookupRelativeSymbolIndex' n s 533 | ] ++ map (lookupRelativeSymbolIndex' n) args 534 | where cconv = shift (fromEnum' cc) (fromEnum CALL_CCONV) 535 | explTy = setBit 0 (fromEnum' CALL_EXPLICIT_TYPE) 536 | tcKind = case tck of 537 | I.None -> 0 538 | I.Tail -> setBit 0 (fromEnum' CALL_TAIL) 539 | I.MustTail -> setBit 0 (fromEnum' CALL_MUSTTAIL) 540 | I.NoTail -> setBit 0 (fromEnum' CALL_NOTAIL) 541 | 542 | mkInstRec n (I.Cmp2 _ lhs rhs pred) = mkRec FC.INST_CMP2 [ lookupRelativeSymbolIndex' n lhs 543 | , lookupRelativeSymbolIndex' n rhs 544 | , fromEnum' pred 545 | ] 546 | 547 | mkInstRec n (I.Gep ty inbounds base idxs) = mkRec FC.INST_GEP $ [ (bool inbounds) :: Word64 548 | , lookupTypeIndex typeMap (lower ty)] 549 | ++ map (lookupRelativeSymbolIndex' n) (base:idxs) 550 | 551 | mkInstRec n (I.Ret (Just val)) = mkRec FC.INST_RET [ lookupRelativeSymbolIndex' n val :: Word64 ] 552 | mkInstRec n (I.Ret Nothing) = mkEmptyRec FC.INST_RET 553 | mkInstRec n (I.UBr bbId) = mkRec FC.INST_BR [bbId] 554 | mkInstRec n (I.Br val bbId bbId') = mkRec FC.INST_BR [ bbId 555 | , bbId' 556 | , lookupRelativeSymbolIndex' n val 557 | ] 558 | mkInstRec n (I.Fence order scope) 559 | = mkRec FC.INST_FENCE [ fromEnum' order :: Int 560 | , fromEnum' scope 561 | ] 562 | mkInstRec n (I.CmpXchg ptr cmp new {- _vol -} order scope failOrder {- _weak -}) 563 | = mkRec FC.INST_CMPXCHG [ lookupRelativeSymbolIndex' n ptr 564 | , lookupRelativeSymbolIndex' n cmp 565 | , lookupRelativeSymbolIndex' n new 566 | , 0 567 | , fromEnum' order 568 | , fromEnum' scope 569 | , fromEnum' failOrder 570 | , 0 571 | ] 572 | mkInstRec n (I.AtomicRMW ptr cmp op {- _vol -} order scope) 573 | = mkRec FC.INST_ATOMICRMW [ lookupRelativeSymbolIndex' n ptr 574 | , lookupRelativeSymbolIndex' n cmp 575 | , fromEnum' op 576 | , 0 577 | , fromEnum' order 578 | , fromEnum' scope 579 | ] 580 | mkInstRec n (I.AtomicStore ptr val align {- _vol -} order scope) 581 | = mkRec FC.INST_STOREATOMIC [ lookupRelativeSymbolIndex' n ptr 582 | , lookupRelativeSymbolIndex' n val 583 | , fromIntegral (bitWidth align) 584 | , 0 585 | , fromEnum' order 586 | , fromEnum' scope 587 | ] 588 | mkInstRec n (I.AtomicLoad _ ptr align {- _vol -} order scope) 589 | = mkRec FC.INST_LOADATOMIC [ lookupRelativeSymbolIndex' n ptr 590 | , lookupTypeIndex typeMap . lower . vty $ ptr 591 | , fromIntegral (bitWidth align) 592 | , 0 593 | , fromEnum' order 594 | , fromEnum' scope 595 | ] 596 | mkInstRec n (I.Switch val bbId bbIds) 597 | = mkRec FC.INST_SWITCH $ [ lookupTypeIndex typeMap (vty val) 598 | , lookupRelativeSymbolIndex' n val 599 | , bbId 600 | ] 601 | ++ concat [ [ lookupSymbolIndex val 602 | , bbId ] 603 | | (val, bbId) <- bbIds 604 | ] 605 | 606 | mkInstRec n (I.ExtractValue val idxs) 607 | = mkRec FC.INST_EXTRACTVAL $ [ lookupRelativeSymbolIndex' n val ] ++ idxs 608 | 609 | mkInstRec n i = error $ "Instruction " ++ (show i) ++ " not yet supported." 610 | -- Fold helper to keep track of the instruction count. 611 | mkInstRecFold :: HasCallStack => (Word64, [NBitCode]) -> I.Inst -> (Word64, [NBitCode]) 612 | mkInstRecFold (n, codes) inst = case instTy inst of 613 | Just _ -> (n+1,mkInstRec n inst:codes) 614 | Nothing -> (n, mkInstRec n inst:codes) 615 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Type.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveGeneric #-} 2 | 3 | module Data.BitCode.LLVM.Type where 4 | 5 | import GHC.Generics (Generic) 6 | import Data.Binary (Binary) 7 | 8 | import Data.Word (Word64) 9 | 10 | -- * Types 11 | data Ty 12 | = NumEntry Word64 13 | | Void | Float | Double | Label | Opaque { oName :: String } 14 | | Int { teWidth :: Word64 } 15 | | Ptr { teAddressSpace :: Word64, tePointeeTy :: Ty } 16 | -- | FnOld 17 | | Half 18 | | Array { teNumElts :: Word64, teEltTy :: Ty } 19 | | Vector { teNumElts :: Word64, teEltTy :: Ty } 20 | | X86Fp80 21 | | Fp128 22 | | Metadata 23 | | X86Mmx 24 | | StructAnon { teAnonIsPacked :: Bool, teAnonEltTy :: [Ty] } 25 | | StructNamed { teName :: String, teNamedIsPacked :: Bool, teNamedEltTy :: [Ty] } 26 | | Function { teVarArg :: Bool, teRetTy :: Ty, teParamTy :: [Ty] } 27 | | Token 28 | deriving (Show, Ord, Eq, Generic) 29 | 30 | instance Binary Ty 31 | 32 | orderIdx :: Ty -> Int 33 | orderIdx (NumEntry{}) = 0 34 | orderIdx Void = 1 35 | orderIdx Float = 2 36 | orderIdx Double = 3 37 | orderIdx Label = 4 38 | orderIdx (Opaque{}) = 5 39 | orderIdx (Int{}) = 6 40 | orderIdx Half = 7 41 | orderIdx (Array{}) = 8 42 | orderIdx (Vector{}) = 9 43 | orderIdx X86Fp80 = 10 44 | orderIdx Fp128 = 11 45 | orderIdx Metadata = 12 46 | orderIdx X86Mmx = 13 47 | orderIdx StructAnon{} = 14 48 | orderIdx StructNamed{}= 15 49 | orderIdx Function{} = 16 50 | orderIdx Token = 17 51 | orderIdx (Ptr{}) = 18 52 | 53 | isComplex :: Ty -> Bool 54 | isComplex (Ptr{}) = True 55 | isComplex (Array{}) = True 56 | isComplex (Vector{}) = True 57 | isComplex (StructAnon{}) = True 58 | isComplex (StructNamed{}) = True 59 | isComplex (Function{}) = True 60 | isComplex _ = False 61 | 62 | isPrimitive :: Ty -> Bool 63 | isPrimitive = not . isComplex 64 | 65 | isPtr :: Ty -> Bool 66 | isPtr (Ptr{}) = True 67 | isPtr _ = False 68 | 69 | typeCompare :: Ty -> Ty -> Ordering 70 | typeCompare x y | x == y = EQ 71 | | isPrimitive x && isPrimitive y = x `compare` y 72 | -- primitives first 73 | | isPrimitive x && isComplex y = LT 74 | | isComplex x && isComplex y && length (subTypes x) < length (subTypes y) = LT 75 | | isComplex x && isComplex y && x `elem` (subTypes y) = LT 76 | | isComplex x && isComplex y && and ((map isLtEq (subTypes x)) <*> subTypes y) = LT 77 | | otherwise = GT 78 | where isLtEq x y = not (GT == typeCompare x y) 79 | 80 | subTypes :: Ty -> [Ty] 81 | subTypes (Ptr _ t) = t:subTypes t 82 | subTypes (Array _ t) = t:subTypes t 83 | subTypes (Vector _ t) = t:subTypes t 84 | subTypes (StructAnon _ ts) = concatMap (\t -> t:subTypes t) ts 85 | subTypes (StructNamed _ _ ts) = concatMap (\t -> t:subTypes t) ts 86 | subTypes (Function _ r pt) = concatMap (\t -> t:subTypes t) (r:pt) 87 | subTypes _ = [] 88 | 89 | ftypes :: [Ty] -> Ty -> [Ty] 90 | ftypes tys t | t `elem` tys = tys 91 | ftypes tys t = case t of 92 | (Ptr _ t') -> ftypes tys t' ++ [t] 93 | (Array _ t') -> ftypes tys t' ++ [t] 94 | (Vector _ t') -> ftypes tys t' ++ [t] 95 | (StructAnon _ ts) -> foldl ftypes tys ts ++ [t] 96 | (StructNamed _ _ ts) -> foldl ftypes tys ts ++ [t] 97 | (Function _ r pt) -> foldl ftypes tys (r:pt) ++ [t] 98 | _ -> t:tys 99 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Types.hs: -------------------------------------------------------------------------------- 1 | module Data.BitCode.LLVM.Types where 2 | 3 | import Data.Word (Word64) 4 | 5 | -- * Functions, Instructions, Basic Blocks 6 | type Align = Word64 7 | type Label = String 8 | type BasicBlockId = Word64 9 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Util.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -fprof-auto #-} 2 | module Data.BitCode.LLVM.Util where 3 | 4 | import Text.PrettyPrint 5 | import Data.BitCode.LLVM.Classes.HasType 6 | import Data.BitCode.LLVM.Pretty 7 | 8 | import qualified Data.BitCode.LLVM.Type as Ty 9 | import qualified Data.BitCode.LLVM.Value as Val 10 | import qualified Data.BitCode.LLVM.Instruction as Inst 11 | 12 | import GHC.Stack 13 | 14 | import Debug.Trace 15 | 16 | isPtr :: HasCallStack => Ty.Ty -> Bool 17 | isPtr (Ty.Ptr{}) = True 18 | isPtr _ = False 19 | 20 | isInt :: HasCallStack => Ty.Ty -> Bool 21 | isInt (Ty.Int{}) = True 22 | isInt _ = False 23 | 24 | isArray :: HasCallStack => Ty.Ty -> Bool 25 | isArray (Ty.Array{}) = True 26 | isArray _ = False 27 | 28 | isVector :: HasCallStack => Ty.Ty -> Bool 29 | isVector (Ty.Vector{}) = True 30 | isVector _ = False 31 | 32 | isStruct :: HasCallStack => Ty.Ty -> Bool 33 | isStruct (Ty.StructAnon{}) = True 34 | isStruct (Ty.StructNamed{}) = True 35 | isStruct _ = False 36 | 37 | isFunction :: HasCallStack => Ty.Ty -> Bool 38 | isFunction (Ty.Function{}) = True 39 | isFunction _ = False 40 | 41 | isFunctionPtr :: HasCallStack => Ty.Ty -> Bool 42 | isFunctionPtr t = (isPtr t) && (isFunction (lower t)) 43 | 44 | -- | Checks if a type is of tyep @i1@. LLVM requires 45 | -- types to be i1 (bool) in binary conditonal position. 46 | isBoolTy :: HasCallStack => Val.Symbol -> Bool 47 | isBoolTy t | ty t == Ty.Int 1 = True 48 | | otherwise = False 49 | 50 | lift :: HasCallStack => Ty.Ty -> Ty.Ty 51 | lift t = Ty.Ptr 0 t 52 | 53 | lower :: HasCallStack => Ty.Ty -> Ty.Ty 54 | lower (Ty.Ptr _ t) = t 55 | lower t = error . show $ text "Type:" <+> pretty t <+> text "cannot be lowerd." 56 | 57 | elemTy :: HasCallStack => Ty.Ty -> Ty.Ty 58 | elemTy (Ty.Array _ t) = t 59 | elemTy (Ty.Vector _ t) = t 60 | elemTy t = error . show $ text "Type:" <+> pretty t <+> text "does not have an element type." 61 | 62 | elemTys :: HasCallStack => Ty.Ty -> [Ty.Ty] 63 | elemTys (Ty.StructAnon _ ts) = ts 64 | elemTys (Ty.StructNamed _ _ ts) = ts 65 | elemTys t = error . show $ text "Type:" <+> pretty t <+> text "does not have element types." 66 | 67 | funRetTy :: HasCallStack => Ty.Ty -> Ty.Ty 68 | funRetTy (Ty.Ptr _ (Ty.Function _ t _)) = t 69 | funRetTy t = error . show $ text "Type:" <+> pretty t <+> parens (text (show t)) <+> text "is not a function type." 70 | 71 | funParamTys :: HasCallStack => Ty.Ty -> [Ty.Ty] 72 | funParamTys (Ty.Ptr _ (Ty.Function _ _ ts)) = ts 73 | funParamTys t = error . show $ text "Type:" <+> pretty t <+> parens (text (show t)) <+> text "is not a function type." 74 | 75 | -- TODO: Compute instruction type from the symbol! 76 | -- TODO: make this an HasType for Instructions. Use error where invalid; and have a 77 | -- separate function @hasResult@. (Or use undef?) As this is used in the tref 78 | -- computation fold. 79 | instTy :: HasCallStack => Inst.Inst -> Maybe Ty.Ty 80 | instTy (Inst.Alloca t _ _) = Just t 81 | instTy (Inst.Cast t _ _) = Just t 82 | instTy (Inst.Load t _ _) = Just t 83 | instTy (Inst.Store{}) = Nothing 84 | -- TODO: Maybe this should be at the callStie of instTy? 85 | -- Instead of it being here. 86 | instTy (Inst.Call t _ _ _ _ _ ) | t == Ty.Void = Nothing 87 | | otherwise = Just t 88 | instTy (Inst.Ret{}) = Nothing 89 | instTy (Inst.UBr{}) = Nothing 90 | instTy (Inst.Br{}) = Nothing 91 | instTy (Inst.Switch{}) = Nothing 92 | instTy (Inst.Cmp2 t _ _ _) = Just t 93 | instTy (Inst.BinOp t _ _ _ _) = Just t 94 | instTy (Inst.CmpXchg p _ _ _ _ _) = Just (Ty.StructAnon False [(lower (ty p)), Ty.Int 1]) -- { lower ty, i1 } 95 | instTy (Inst.Fence{}) = Nothing 96 | instTy (Inst.AtomicRMW p _ _ _ _) = Just (lower (ty p)) 97 | instTy (Inst.AtomicStore{}) = Nothing 98 | instTy (Inst.AtomicLoad t _ _ _ _) = Just t 99 | 100 | -- GEP returns a pointer to it's type. 101 | instTy (Inst.Gep bt _ s idxs) | bt == ty s = Just $ lift $ drill (ty s) idxs 102 | | otherwise = error $ "Broken getElementPointer. Basetype: " ++ show bt ++ " and value type type: " ++ show (lower (ty s)) ++ " don't match!" 103 | instTy (Inst.ExtractValue s idxs) = Just $ drill' (ty s) (map mkI32Val idxs) 104 | where mkI32Val = Val.Constant (Ty.Int 32) . Val.Int . fromIntegral 105 | 106 | -- instTy i = error $ "No instTy for instruction: " ++ show i 107 | 108 | 109 | -- TODO: Support Vector indexes. 110 | drill :: HasCallStack => Ty.Ty -> [Val.Symbol] -> Ty.Ty 111 | drill t = drill' t . map Val.symbolValue 112 | drill' :: HasCallStack => Ty.Ty -> [Val.Value] -> Ty.Ty 113 | drill' t [] = t 114 | drill' (Ty.Array _ t) (Val.Constant (Ty.Int{}) (Val.Int _):vs) = drill' t vs 115 | drill' t@(Ty.Array _ _) (idx:_) = error $ "Cannot drill into " ++ show (pretty t) ++ " with " ++ show (pretty idx) 116 | drill' (Ty.Vector _ t) (Val.Constant (Ty.Int{}) (Val.Int _):vs) = drill' t vs 117 | drill' t@(Ty.Vector _ _) (idx:_) = error $ "Cannot drill into " ++ show t ++ " with " ++ show idx 118 | drill' (Ty.Ptr _ t) (Val.Constant (Ty.Int{}) (Val.Int _):vs) = drill' t vs 119 | drill' t@(Ty.Ptr _ _) (idx:_) = error $ "Cannot drill into " ++ show t ++ " with " ++ show idx 120 | drill' t@(Ty.StructAnon _ tys) (Val.Constant (Ty.Int 32) (Val.Int n):vs) 121 | | 0 <= n && n < length tys = drill' (tys !! n) vs 122 | | otherwise = error $ "Cannot drill into struct " ++ show t ++ " with " ++ show n ++ "; index out of bounds!" 123 | drill' t@(Ty.StructAnon _ _) (idx:_) = error $ "Can only drill into struct " ++ show t ++ " with int32, " ++ show idx ++ " given." 124 | drill' t@(Ty.StructNamed _ _ tys) (Val.Constant (Ty.Int 32) (Val.Int n):vs) 125 | | 0 <= n && n < length tys = drill' (tys !! n) vs 126 | | otherwise = error $ "Cannot drill into struct " ++ show t ++ " with " ++ show n ++ "; index out of bounds!" 127 | drill' t@(Ty.StructNamed _ _ _) (idx:_) = error $ "Can only drill into struct " ++ show t ++ " with int32, " ++ show idx ++ " given." 128 | 129 | -- The terminator instructions are: ‘ret‘, ‘br‘, ‘switch‘, ‘indirectbr‘, ‘invoke‘, ‘resume‘, ‘catchswitch‘, ‘catchret‘, ‘cleanupret‘, and ‘unreachable‘. 130 | isTerminator :: HasCallStack => Inst.Inst -> Bool 131 | isTerminator (Inst.Ret{}) = True 132 | isTerminator (Inst.UBr{}) = True 133 | isTerminator (Inst.Br{}) = True 134 | isTerminator (Inst.Switch{}) = True 135 | isTerminator _ = False 136 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Value.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE RecordWildCards #-} 2 | {-# LANGUAGE DeriveGeneric #-} 3 | {-# LANGUAGE BangPatterns #-} 4 | {-# LANGUAGE StandaloneDeriving #-} 5 | {-# LANGUAGE FlexibleInstances #-} 6 | module Data.BitCode.LLVM.Value where 7 | 8 | import Data.Word (Word16, Word32, Word64) 9 | import Data.BitCode.LLVM.Type (Ty) 10 | import Data.BitCode.LLVM.Linkage (Linkage(..)) 11 | import Data.BitCode.LLVM.Visibility (Visibility) 12 | import Data.BitCode.LLVM.ThreadLocalMode (ThreadLocalMode) 13 | import Data.BitCode.LLVM.StorageClass (DLLStorageClass) 14 | import Data.BitCode.LLVM.CallingConv (CallingConv) 15 | import Data.BitCode.LLVM.Opcodes.Cast (CastOp) 16 | import Data.BitCode.LLVM.Opcodes.Binary (BinOp) 17 | 18 | import Data.BitCode.LLVM.Classes.HasType 19 | 20 | import GHC.Generics (Generic) 21 | import Data.Binary (Binary) 22 | 23 | import GHC.Stack (HasCallStack) 24 | 25 | import qualified Debug.Trace as Trace 26 | 27 | trace :: String -> a -> a 28 | trace _ x = x 29 | -- trace = Trace.trace 30 | traceM :: Applicative f => String -> f () 31 | traceM _ = pure () 32 | 33 | 34 | -- | Just a reference. 35 | type Ref = Int 36 | 37 | data FpValue 38 | = FpHalf !Word16 -- ^ IEEEhalf 39 | | FpSingle !Word32 -- ^ IEEEsingle 40 | | FpDouble !Word64 -- ^ IEEEdouble 41 | | FpDoubleExt !(Word64, Word64) -- ^ X86 Double Extended (80) 42 | | FpQuad !(Word64, Word64) -- ^ IEEEquad 43 | | FpDoubleDouble !(Word64, Word64) -- ^ PPCDoubleDouble 44 | deriving (Show, Eq, Ord, Generic) 45 | 46 | -- | Const `types`. These are usually wrapped in a Constant. 47 | -- which carries their type. 48 | data Const 49 | = Null 50 | | Undef 51 | | Int !Int -- ^ [intval] 52 | | WideInt ![Int] -- ^ [n x intval] 53 | | Float !FpValue -- ^ [fpval] 54 | -- aggregate constants. Undef is also an aggregate. 55 | | Array [Symbol] -- ^ aggregate 56 | | Vector [Symbol] -- ^ aggregate 57 | | Struct [Symbol] -- ^ aggregate 58 | | String !String 59 | | CString !String 60 | | BinOp !BinOp Symbol Symbol -- ^ [opcode, opval, opval] 61 | | Cast Ty !CastOp Symbol -- ^ [opcode, opty, opval] 62 | | Gep ![Word64] -- ^ [n x operands] 63 | | Select !Word64 !Word64 !Word64 -- ^ [opval, opval, opval] 64 | | ExtractElt !Word64 !Word64 !Word64 -- ^ [opty, opval, opval] 65 | | InsertElt !Word64 !Word64 !Word64 -- ^ [opval, opval, opval] 66 | | ShuffleVec !Word64 !Word64 !Word64 -- ^ [opval, opval, opval] 67 | | Cmp !Word64 !Word64 !Word64 !Word64 -- ^ [opty, opval, opval, pred] 68 | -- | InlineAsm -- TODO 69 | | ShuffleVecEx !Word64 !Word64 !Word64 !Word64 -- ^ [opty, opval, opval, opval] 70 | | InboundsGep Ty [Symbol] -- ^ [[ty,] opty, opval,...]; if ty defaults to nullptr. 71 | | BlockAddress !Word64 !Word64 !Word64 -- ^ [fnty, fnval, bb#] 72 | | Data ![Word64] -- ^ [n x elements] 73 | -- | InlineAsm -- TODO 74 | deriving (Show, Eq, Ord, Generic) 75 | 76 | data FunctionExtra 77 | = FE 78 | { feProto :: !Bool -- ^ Non-zero if this entry represents a declaration rather than a definition 79 | , fePrologueData :: (Maybe Symbol) -- ^ If non-zero, the value index of the prologue data for this function, plus 1. 80 | , fePrefixData :: (Maybe Symbol) -- ^ If non-zero, the value index of the prefix data for this function, plus 1. 81 | } 82 | deriving (Show, Generic) 83 | 84 | -- Function Extras are ignored 85 | -- for Ord and Eq 86 | instance Ord FunctionExtra where 87 | x <= y = True 88 | 89 | instance Eq FunctionExtra where 90 | x == y = True 91 | 92 | -- | Values the ValueList may contain. 93 | data Value 94 | -- | global variables 95 | = Global 96 | { gPointerType :: Ty -- ^ The type index of the pointer type used to point to this global variable 97 | , gIsConst :: Bool -- ^ Non-zero if the variable is treated as constant within the module, or zero if it is not 98 | , gAddressSpace :: Word64 99 | , gInit :: (Maybe Symbol) -- ^ If non-zero, the value index of the initializer for this variable, plus 1. 100 | , gLinkage :: Linkage 101 | , gParamAttrs :: Word64 102 | -- , gAlignment :: Word64 -- ^ The logarithm base 2 of the variable's requested alignment, plus 1 103 | -- TODO: turn this into a Maybe 104 | , gSection :: Word64 -- ^ If non-zero, the 1-based section index in the table of @MODULE_SECTION_NAME@. 105 | , gVisibility :: Visibility -- ^ If present, an encoding of the visibility of this variable 106 | , gThreadLocal :: ThreadLocalMode -- ^ If present, an encoding of the thread local storage mode of the variable 107 | , gUnnamedAddr :: Bool -- ^ If present and non-zero, indicates that the variable has @unnamed_addr@ 108 | , gExternallyInitialized :: Bool 109 | , gDLLStorageClass :: DLLStorageClass -- ^ If present, an encoding of the DLL storage class of this variable 110 | , gComdat :: Word64 -- ??? 111 | } 112 | -- | function values 113 | | Function 114 | { fType :: Ty -- ^ The type index of the function type describing this function 115 | , fCallingConv :: CallingConv 116 | , fLinkage :: Linkage 117 | , fParamAttrs :: Word64 -- ^ If nonzero, the 1-based parameter attribute index into the table of @PARAMATTR_CODE_ENTRY@ entries. 118 | , fAlignment :: Word64 119 | , fSection :: Word64 -- ^ If non-zero, the 1-based section index in the table of @MODULE_CODE_SECTIONNAME@ entries. 120 | , fVisibility :: Visibility 121 | , fGC :: Word64 -- ^ If present and nonzero, the 1-based garbage collector index in the table of @MODULE_CODE_GCNAME@ entries. 122 | , fUnnamedAddr :: Bool -- ^ If present and non-zero, indicates that the function has @unnamed_addr@. 123 | , fDLLStorageClass :: DLLStorageClass -- ^ An encoding of the DLL storage class of this function. 124 | , fComdat :: Word64 -- ^ An encoding of the COMDAT of this function 125 | , fPersonalityFn :: Word64 -- ^ If non-zero, the value index of the personality function for this function, plus 1. 126 | , fExtra :: FunctionExtra 127 | } 128 | -- | The @ALIAS@ record (code 9) marks the definition of an alias. 129 | | Alias 130 | { aType :: Ty -- ^ The type index of the alias 131 | , aAddrSpace :: Word64 132 | , aVal :: Symbol -- ^ The value index of the aliased value 133 | , aLinkage :: Linkage 134 | , aVisibility :: Visibility 135 | , aThreadLocal :: ThreadLocalMode -- ^ If present, an encoding of the thread local storage mode of the variable 136 | , aUnnamedAddr :: Bool -- ^ If present and non-zero, indicates that the function has @unnamed_addr@. 137 | , aDLLStorageClass :: DLLStorageClass 138 | } 139 | | Constant { cTy :: Ty, cConst :: Const } -- ^ constant values 140 | | Arg Ty Ref -- ^ function arguments, within function bodies 141 | | Value Ty -- ^ function values, from instructions 142 | -- | Typed reference 143 | | TRef Ty Ref -- ^ typed references are generated by instructions. 144 | -- Forward References; this is somewhat ugly :( 145 | | FwdRef Word64 146 | deriving (Show, Eq, Ord, Generic) 147 | 148 | fIsProto :: Value -> Bool 149 | fIsProto = feProto . fExtra 150 | 151 | fPrologueData :: Value -> Maybe Symbol 152 | fPrologueData = fePrologueData . fExtra 153 | 154 | fPrefixData :: Value -> Maybe Symbol 155 | fPrefixData = fePrefixData . fExtra 156 | 157 | class HasLinkage a where 158 | getLinkage :: a -> Linkage 159 | setLinkage :: Linkage -> a -> a 160 | 161 | external, private, internal :: (HasCallStack, HasLinkage a) => a -> a 162 | external = setLinkage External 163 | private = setLinkage Private 164 | internal = setLinkage Private 165 | 166 | mutable, immutable :: HasCallStack => Value -> Value 167 | mutable x = x { gIsConst = False } 168 | immutable x = x { gIsConst = True } 169 | 170 | 171 | instance HasLinkage Value where 172 | getLinkage (Global{gLinkage = l}) = l 173 | getLinkage (Function{fLinkage = l}) = l 174 | getLinkage (Alias{aLinkage = l}) = l 175 | getLinkage other = error $ show other ++ " has no linkage!" 176 | 177 | setLinkage l v = trace "[HasLinkage Value]" $ case v of 178 | g@(Global{}) -> g { gLinkage = l } 179 | f@(Function{}) -> f { fLinkage = l } 180 | a@(Alias{}) -> a { aLinkage = l } 181 | other -> error $ show other ++ " has no linkage!" 182 | 183 | 184 | instance HasType Value where 185 | ty v = trace "[HasType Value]" $ case v of 186 | (Global{..}) -> gPointerType 187 | (Function{..}) -> fType 188 | (Alias{..}) -> aType 189 | (Constant t _) -> t 190 | (Arg t _) -> t 191 | (Value t) -> t 192 | (TRef t _) -> t 193 | 194 | data IndexType = GlobI | ArgI | InstI 195 | deriving (Show, Eq, Ord) 196 | 197 | data Indexed a = Indexed IndexType (Int -> a) 198 | 199 | instance Show a => Show (Indexed a) 200 | where show _ = "..." 201 | 202 | type Index = Indexed Word64 203 | 204 | instance Eq (Indexed a) where 205 | (Indexed t _) == (Indexed t' _) = t == t' 206 | 207 | instance Ord (Indexed a) where 208 | (Indexed t _) `compare` (Indexed t' _) = t `compare` t' 209 | 210 | instance Functor Indexed where 211 | f `fmap` (Indexed t x) = Indexed t (\y -> f (x y)) 212 | 213 | instance Applicative Indexed where 214 | pure x = Indexed GlobI (pure x) 215 | (Indexed t x) <*> (Indexed t' y) | t == t' = Indexed t (x <*> y) 216 | | otherwise = error "Cannot apply idxs of different type" 217 | 218 | instance Ord (Int -> Named a) where 219 | x `compare` y = EQ 220 | 221 | instance Eq (Int -> Named a) where 222 | x == y = True 223 | 224 | instance Show (Int -> Named a) where 225 | show _ = "..." 226 | 227 | data Named a 228 | = Named !String Index Ty a 229 | | Unnamed Index Ty a 230 | | Lazy !String Ty (Int -> (Named a)) 231 | deriving (Generic) 232 | 233 | deriving instance Eq (Named Value) 234 | deriving instance Ord (Named Value) 235 | deriving instance Show (Named Value) 236 | 237 | mkNamed :: HasCallStack => Ty -> String -> Value -> Symbol 238 | mkNamed t s v = Named s (Indexed GlobI (const undefined)) t (trace ("[mkNamed] accessing " ++ s ++ " value") v) 239 | 240 | mkUnnamed' :: HasCallStack => IndexType -> Ty -> Value -> Symbol 241 | mkUnnamed' it t v = Unnamed (Indexed it (const undefined)) t (trace ("[mkUnnamed] accessing unnamed value of type " ++ show t) v) 242 | 243 | mkUnnamed :: HasCallStack => Ty -> Value -> Symbol 244 | mkUnnamed t = mkUnnamed' GlobI t 245 | 246 | mkUnnamedInst :: HasCallStack => Ty -> Value -> Symbol 247 | mkUnnamedInst t = mkUnnamed' InstI t 248 | 249 | instance Functor Named where 250 | fmap f (Named n i t x) = Named n i t (f (trace "[fmap] symbol value" x)) 251 | fmap f (Unnamed i t x) = Unnamed i t (f (trace "[fmap] symbol value" x)) 252 | fmap f (Lazy n t v) = Lazy n t (\x -> fmap f (v x)) 253 | 254 | 255 | type Symbol = Named Value 256 | 257 | instance HasLinkage a => HasLinkage (Named a) where 258 | getLinkage (Named _ _ _ x) = getLinkage x 259 | getLinkage (Unnamed _ _ x) = getLinkage x 260 | setLinkage l = fmap (setLinkage l) 261 | 262 | symbolValue :: Symbol -> Value 263 | symbolValue (Named n _ _ v) = trace ("[symbolValue] for symbol " ++ n) v 264 | symbolValue (Unnamed _ _ v) = trace ("[symbolValue] for symbol") v 265 | symbolValue (Lazy n _ v) = symbolValue (v 0) 266 | 267 | symbolName :: Symbol -> Maybe String 268 | symbolName (Named s _ _ _) = Just s 269 | symbolName (Unnamed _ _ _) = Nothing 270 | symbolName (Lazy s _ _) = Just s 271 | 272 | symbolType :: Symbol -> Ty 273 | symbolType (Named _ _ t _) = t 274 | symbolType (Unnamed _ t _) = t 275 | symbolType (Lazy _ t _) = t 276 | 277 | symbolIndex :: Symbol -> Index 278 | symbolIndex (Named _ i _ _) = i 279 | symbolIndex (Unnamed i _ _) = i 280 | symbolIndex (Lazy _ _ s) = symbolIndex (s 0) 281 | 282 | symbolIndexValue :: HasCallStack => Symbol -> Word64 283 | symbolIndexValue s = let (Indexed _ i) = symbolIndex s in i 0 284 | 285 | symbolIndexType :: HasCallStack => Symbol -> IndexType 286 | symbolIndexType s = let (Indexed t _) = symbolIndex s in t 287 | 288 | withIndex' :: IndexType -> (Int -> Word64) -> Symbol -> Symbol 289 | withIndex' t i (Named s _ t' v) = (Named s (Indexed t i) t' (trace ("evaluating indexed " ++ s) v)) 290 | withIndex' t i (Unnamed _ t' v) = (Unnamed (Indexed t i) t' (trace "evaluating indexed unnamed" v)) 291 | withIndex' _ _ (Lazy _ _ _) = error "Can not set value on lazy!" 292 | 293 | withIndex, withArgIndex, withInstIndex :: (Int -> Word64) -> Symbol -> Symbol 294 | withIndex i = withIndex' GlobI i 295 | withArgIndex i = withIndex' ArgI i 296 | withInstIndex i = withIndex' InstI i 297 | 298 | type ValueSymbolTable = [(Int,ValueSymbolEntry)] 299 | 300 | data ValueSymbolEntry 301 | = Entry !String -- ^ value id, string 302 | | FnEntry !Int !String -- ^ value id, offset, string 303 | deriving Show 304 | 305 | entryName :: ValueSymbolEntry -> String 306 | entryName (Entry s) = s 307 | entryName (FnEntry _ s) = s 308 | 309 | -------------------------------------------------------------------------------- /src/Data/BitCode/LLVM/Visibility.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveGeneric #-} 2 | module Data.BitCode.LLVM.Visibility where 3 | 4 | import GHC.Generics (Generic) 5 | import Data.Binary (Binary) 6 | 7 | -- see @include/llvm/IR/GlobalValue.h@ 8 | -- | An enumeration for the kinds of visibility of global values. 9 | data Visibility 10 | -- | The GV is visible 11 | = Default -- 0 12 | -- | The GV is hidden 13 | | Hidden -- 1 14 | -- | The GV is protected 15 | | Protected -- 2 16 | deriving (Eq, Enum, Ord, Show, Generic) 17 | 18 | instance Binary Visibility 19 | 20 | -------------------------------------------------------------------------------- /stack.yaml: -------------------------------------------------------------------------------- 1 | resolver: nightly-2016-08-06 2 | packages: 3 | - '.' 4 | - '../data-bitcode' 5 | 6 | system-ghc: true 7 | compiler: ghc-8.0.1 8 | -------------------------------------------------------------------------------- /test/LLVMSpec.hs: -------------------------------------------------------------------------------- 1 | module LLVMSpec where 2 | 3 | import Test.Tasty.Hspec 4 | 5 | import Data.BitCode as BC 6 | import Data.BitCode.Reader as BC 7 | import Data.BitCode.LLVM.FromBitCode as BC 8 | import Data.BitCode.LLVM as LLVM 9 | import Data.BitCode.LLVM.Reader.Monad as LLVM 10 | import Data.BitCode.LLVM.Pretty 11 | import qualified Data.BitCode.LLVM.Instruction as I 12 | import qualified Data.BitCode.LLVM.Function as F 13 | import qualified Data.BitCode.Writer.Monad as BCM (writeFile) 14 | import Data.BitCode.Writer (emitTopLevel) 15 | import Data.Maybe (catMaybes) 16 | import Data.BitCode.LLVM.ToBitCode (toBitCode) 17 | import Data.BitCode.Writer.Monad (withHeader) 18 | 19 | import System.Process (readProcessWithExitCode) 20 | import System.Exit (ExitCode(ExitSuccess)) 21 | import System.FilePath ((-<.>), ()) 22 | import Data.Either (isRight) 23 | import GHC.Stack (HasCallStack) 24 | 25 | writeFile' :: HasCallStack => FilePath -> [BitCode] -> IO () 26 | writeFile' fp = BCM.writeFile fp . withHeader True . emitTopLevel 27 | 28 | compile :: HasCallStack => FilePath -> IO FilePath 29 | compile f = do 30 | (exit, _out, _err) <- readProcessWithExitCode 31 | "clang" 32 | [ "-w" -- no warnings 33 | , "-emit-llvm" 34 | , "-c" 35 | , f 36 | , "-o" 37 | , fout ] 38 | "" --stdin 39 | case exit of 40 | ExitSuccess -> return fout 41 | err -> error $ show err 42 | where 43 | fout = f -<.> "bc" 44 | 45 | decompile :: HasCallStack => FilePath -> IO FilePath 46 | decompile f = do 47 | (exit, _out, _err) <- readProcessWithExitCode 48 | "llvm-dis" 49 | [ "-o" 50 | , fout 51 | , f ] 52 | "" --stdin 53 | case exit of 54 | ExitSuccess -> return fout 55 | err -> error $ show err 56 | where 57 | fout = f -<.> "dis" 58 | 59 | readBitcode :: HasCallStack => FilePath -> IO (Either String (Maybe Ident, Module)) 60 | readBitcode f = do 61 | res <- BC.readFile f 62 | return $ (evalLLVMReader . parseTopLevel . catMaybes . map normalize) =<< res 63 | 64 | dumpBitcode :: HasCallStack => FilePath -> IO () 65 | dumpBitcode f = do 66 | (Right (_, mod)) <- readBitcode f 67 | putStrLn . show . pretty $ mod 68 | 69 | moduleInstructions :: HasCallStack => Module -> [I.Inst] 70 | moduleInstructions m = 71 | concatMap funcInsts (LLVM.mFns m) 72 | where 73 | funcInsts = concatMap blockInsts . F.dBody 74 | blockInsts :: F.BasicBlock -> [I.Inst] 75 | blockInsts (F.BasicBlock insts) = map snd insts 76 | blockInsts (F.NamedBlock _ insts) = map snd insts 77 | 78 | -- Note: we often do not try to "write", as building 79 | -- up modules by hand is rather hard. However 80 | -- building modules is usually done with the 81 | -- LLVM EDSL, and as such tests for writing modules 82 | -- should be done there. 83 | 84 | isModule :: HasCallStack => Either String (Maybe Ident, Module) -> Bool 85 | isModule = isRight 86 | 87 | 88 | isCmpXchg, isFence, isAtomicRMW, isAtomicLoad, isAtomicStore, isSwitch, isExtractValue 89 | :: HasCallStack => I.Inst -> Bool 90 | 91 | isCmpXchg (I.CmpXchg{}) = True 92 | isCmpXchg _ = False 93 | 94 | isFence (I.Fence{}) = True 95 | isFence _ = False 96 | 97 | isAtomicRMW (I.AtomicRMW{}) = True 98 | isAtomicRMW _ = False 99 | 100 | isAtomicLoad (I.AtomicLoad{}) = True 101 | isAtomicLoad _ = False 102 | 103 | isAtomicStore (I.AtomicStore{}) = True 104 | isAtomicStore _ = False 105 | 106 | isSwitch (I.Switch{}) = True 107 | isSwitch _ = False 108 | 109 | isExtractValue (I.ExtractValue{}) = True 110 | isExtractValue _ = False 111 | 112 | 113 | compileModule :: HasCallStack => FilePath -> IO (FilePath, (Maybe Ident, Module)) 114 | compileModule fname = do 115 | bcfile <- compile $ "test/fromBitcode" fname 116 | ret <- readBitcode bcfile 117 | ret `shouldSatisfy` isModule 118 | let Right mod = ret 119 | return (bcfile, mod) 120 | 121 | roundtripModule :: HasCallStack => FilePath -> IO [String] 122 | roundtripModule fname = do 123 | (bcfile, mod) <- compileModule fname 124 | -- write the module back into the same file 125 | writeFile' bcfile . map denormalize $ toBitCode mod 126 | -- try to read it again 127 | ret <- readBitcode bcfile 128 | ret `shouldSatisfy` isModule 129 | -- make sure llvm doesn't throw up trying to decompile it 130 | decompile bcfile `shouldReturn` (bcfile -<.> "dis") 131 | lines <$> Prelude.readFile (bcfile -<.> "dis") 132 | 133 | spec_llvm :: Spec 134 | spec_llvm = do 135 | describe "fromBitcode" $ do 136 | it "should be able to read CMPXCHG" $ do 137 | bcfile <- compile "test/fromBitcode/cmpxchg.ll" 138 | ret <- readBitcode bcfile 139 | ret `shouldSatisfy` isModule 140 | let Right (_mbIdent, mod) = ret 141 | moduleInstructions mod `shouldSatisfy` (any isCmpXchg) 142 | 143 | it "should be able to roundtrip CMPXCHG" $ do 144 | bcfile <- compile "test/fromBitcode/cmpxchg.ll" 145 | ret <- readBitcode bcfile 146 | ret `shouldSatisfy` isModule 147 | let Right mod = ret 148 | writeFile' bcfile . map denormalize $ toBitCode mod 149 | ret <- readBitcode bcfile 150 | ret `shouldSatisfy` isModule 151 | decompile bcfile `shouldReturn` "test/fromBitcode/cmpxchg.dis" 152 | 153 | it "should be able to read FENCE" $ do 154 | bcfile <- compile "test/fromBitcode/fence.ll" 155 | ret <- readBitcode bcfile 156 | ret `shouldSatisfy` isModule 157 | let Right (_mbIdent, mod) = ret 158 | moduleInstructions mod `shouldSatisfy` (any isFence) 159 | 160 | it "should be able to roundtrip FENCE" $ do 161 | bcfile <- compile "test/fromBitcode/fence.ll" 162 | ret <- readBitcode bcfile 163 | ret `shouldSatisfy` isModule 164 | let Right mod = ret 165 | writeFile' bcfile . map denormalize $ toBitCode mod 166 | ret <- readBitcode bcfile 167 | ret `shouldSatisfy` isModule 168 | decompile bcfile `shouldReturn` "test/fromBitcode/fence.dis" 169 | 170 | it "should be able to read ATOMIC RMW" $ do 171 | bcfile <- compile "test/fromBitcode/atomicrmw.ll" 172 | ret <- readBitcode bcfile 173 | ret `shouldSatisfy` isModule 174 | let Right (_mbIdent, mod) = ret 175 | moduleInstructions mod `shouldSatisfy` (any isAtomicRMW) 176 | 177 | it "should be able to roundtrip ATOMIC RMW" $ do 178 | bcfile <- compile "test/fromBitcode/atomicrmw.ll" 179 | ret <- readBitcode bcfile 180 | ret `shouldSatisfy` isModule 181 | let Right mod = ret 182 | writeFile' bcfile . map denormalize $ toBitCode mod 183 | ret <- readBitcode bcfile 184 | ret `shouldSatisfy` isModule 185 | decompile bcfile `shouldReturn` "test/fromBitcode/atomicrmw.dis" 186 | 187 | it "should be able to read LOAD ATOMIC" $ do 188 | bcfile <- compile "test/fromBitcode/atomicload.ll" 189 | ret <- readBitcode bcfile 190 | ret `shouldSatisfy` isModule 191 | let Right (_mbIdent, mod) = ret 192 | moduleInstructions mod `shouldSatisfy` (any isAtomicLoad) 193 | 194 | it "should be able to roundtrip LOAD ATOMIC" $ do 195 | bcfile <- compile "test/fromBitcode/atomicload.ll" 196 | ret <- readBitcode bcfile 197 | ret `shouldSatisfy` isModule 198 | let Right mod = ret 199 | writeFile' bcfile . map denormalize $ toBitCode mod 200 | ret <- readBitcode bcfile 201 | ret `shouldSatisfy` isModule 202 | decompile bcfile `shouldReturn` "test/fromBitcode/atomicload.dis" 203 | 204 | it "should be able to read STORE ATOMIC" $ do 205 | bcfile <- compile "test/fromBitcode/atomicstore.ll" 206 | ret <- readBitcode bcfile 207 | ret `shouldSatisfy` isModule 208 | let Right (_mbIdent, mod) = ret 209 | moduleInstructions mod `shouldSatisfy` (any isAtomicStore) 210 | 211 | it "should be able to roundtrip STORE ATOMIC" $ do 212 | bcfile <- compile "test/fromBitcode/atomicstore.ll" 213 | ret <- readBitcode bcfile 214 | ret `shouldSatisfy` isModule 215 | let Right mod = ret 216 | writeFile' bcfile . map denormalize $ toBitCode mod 217 | ret <- readBitcode bcfile 218 | ret `shouldSatisfy` isModule 219 | decompile bcfile `shouldReturn` "test/fromBitcode/atomicstore.dis" 220 | 221 | it "should be able to read SWITCH" $ do 222 | bcfile <- compile "test/fromBitcode/switch.ll" 223 | ret <- readBitcode bcfile 224 | ret `shouldSatisfy` isModule 225 | let Right (_mbIdent, mod) = ret 226 | moduleInstructions mod `shouldSatisfy` (any isSwitch) 227 | 228 | it "should be able to roundtrip SWITCH" $ do 229 | bcfile <- compile "test/fromBitcode/switch.ll" 230 | ret <- readBitcode bcfile 231 | ret `shouldSatisfy` isModule 232 | let Right mod = ret 233 | writeFile' bcfile . map denormalize $ toBitCode mod 234 | ret <- readBitcode bcfile 235 | ret `shouldSatisfy` isModule 236 | decompile bcfile `shouldReturn` "test/fromBitcode/switch.dis" 237 | 238 | it "should be able to read EXTRACT VALUE" $ do 239 | (bcfile, (_mbIdent, mod)) <- compileModule "extractvalue.ll" 240 | moduleInstructions mod `shouldSatisfy` (any isExtractValue) 241 | 242 | it "should be able to roundtrip EXTRACT VALUE" $ do 243 | _ <- roundtripModule "extractvalue.ll" 244 | return () 245 | -------------------------------------------------------------------------------- /test/Tasty.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -F -pgmF tasty-discover -optF --tree-display #-} 2 | 3 | -------------------------------------------------------------------------------- /test/fromBitcode/atomicload.ll: -------------------------------------------------------------------------------- 1 | define i32 @main() { 2 | %ptr = alloca i32 3 | store i32 0, i32* %ptr 4 | %val = load atomic i32, i32* %ptr seq_cst, align 4 5 | ret i32 %val 6 | } 7 | -------------------------------------------------------------------------------- /test/fromBitcode/atomicrmw.ll: -------------------------------------------------------------------------------- 1 | define i32 @main() { 2 | %ptr = alloca i32 3 | store i32 0, i32* %ptr 4 | 5 | %old = atomicrmw add i32* %ptr, i32 1 acquire 6 | 7 | ret i32 %old 8 | } 9 | -------------------------------------------------------------------------------- /test/fromBitcode/atomicstore.ll: -------------------------------------------------------------------------------- 1 | define i32 @main() { 2 | %ptr = alloca i32 3 | store atomic i32 0, i32* %ptr seq_cst, align 8 4 | %val = load i32, i32* %ptr 5 | ret i32 %val 6 | } 7 | -------------------------------------------------------------------------------- /test/fromBitcode/cmpxchg.ll: -------------------------------------------------------------------------------- 1 | 2 | define i32 @main() { 3 | %ptr = alloca i32 4 | %ptr2 = alloca i32 5 | store i32 1, i32* %ptr 6 | store i32 2, i32* %ptr2 7 | %val = load i32, i32* %ptr 8 | %val2 = load i32, i32* %ptr2 9 | %squared = mul i32 %val2, %val2 10 | 11 | cmpxchg i32* %ptr, i32 %val, i32 %squared acq_rel monotonic 12 | 13 | %ret = load i32, i32* %ptr 14 | ret i32 %ret 15 | } 16 | -------------------------------------------------------------------------------- /test/fromBitcode/extractvalue.ll: -------------------------------------------------------------------------------- 1 | 2 | define i32 @main() { 3 | %ptr = alloca { i32, i1 } 4 | store { i32, i1 } { i32 1, i1 1 }, { i32, i1 }* %ptr 5 | %val = load { i32, i1 }, { i32, i1 }* %ptr 6 | 7 | %ret = extractvalue { i32, i1 } %val, 0 8 | 9 | ret i32 %ret 10 | } 11 | -------------------------------------------------------------------------------- /test/fromBitcode/fence.ll: -------------------------------------------------------------------------------- 1 | define i32 @main() { 2 | fence acquire 3 | fence syncscope("singlethread") seq_cst 4 | ret i32 0 5 | } 6 | -------------------------------------------------------------------------------- /test/fromBitcode/memset.ll: -------------------------------------------------------------------------------- 1 | 2 | declare void @llvm.memset.i32(i8*, i8, i32, i32) 3 | ; declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i32) 4 | 5 | define i32 @main() { 6 | %ptr = alloca i8, i8 16, align 4 7 | call void @llvm.memset.i32(i8* %ptr, i8 0, i32 16, i32 4) 8 | ret i32 0 9 | } 10 | -------------------------------------------------------------------------------- /test/fromBitcode/switch.ll: -------------------------------------------------------------------------------- 1 | define i32 @main() { 2 | entry: 3 | %ptr = alloca i32 4 | store i32 0, i32* %ptr 5 | %val = load i32, i32* %ptr 6 | 7 | switch i32 %val, label %otherwise [ i32 0, label %onzero 8 | i32 1, label %onone 9 | i32 2, label %ontwo ] 10 | 11 | 12 | otherwise: 13 | ret i32 -1 14 | onzero: 15 | ret i32 0 16 | onone: 17 | ret i32 1 18 | ontwo: 19 | ret i32 2 20 | } 21 | --------------------------------------------------------------------------------