├── .editorconfig ├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── Setup.hs ├── imp.cabal ├── lib ├── IMP │ ├── AST.hs │ ├── Codegen │ │ ├── Error.hs │ │ ├── GlobalCodegen.hs │ │ ├── SubCodegen.hs │ │ └── Utils.hs │ ├── Emit.hs │ ├── Parser.hs │ ├── Parser │ │ └── Error.hs │ ├── SourceLoc.hs │ ├── SymbolTable.hs │ └── Types.hs └── Test │ └── Tasty │ └── IMP.hs ├── package.yaml ├── src └── Main.hs ├── stack.yaml ├── stdlib ├── impstd.c └── impstd.h └── test ├── CompilerTests.hs ├── IMP └── Test │ ├── Parser.hs │ └── Tests.hs ├── TestSuite.hs └── examples ├── fibi.imp ├── fibi.stdin ├── fibi.stdout ├── fibi_no_rec.imp ├── fibi_no_rec.stdin ├── fibi_no_rec.stdout ├── gcd.imp ├── gcd.stdin ├── gcd.stdout ├── global.imp ├── global.stdin ├── global.stdout ├── hello.imp ├── hello.stdout ├── integer_lit_overflow.imp ├── simple.imp ├── simple.stdout ├── sum.imp ├── sum.stdin ├── sum.stdout ├── test_inout.imp ├── test_inout.stdout ├── test_sub_input.imp ├── test_sub_input.stdin └── test_sub_input.stdout /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | end_of_line = lf 5 | insert_final_newline = true 6 | trim_trailing_whitespace = true 7 | charset = utf-8 8 | 9 | [*.hs] 10 | indent_style = space 11 | indent_size = 2 12 | 13 | [{*.yml,*.yaml}] 14 | indent_style = space 15 | indent_size = 2 16 | 17 | [*.[ch]] 18 | indent_style = space 19 | indent_size = 2 20 | 21 | [*.imp] 22 | indent_style = space 23 | indent_size = 2 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .stack-work 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: bionic 2 | language: c 3 | 4 | cache: 5 | directories: 6 | - $HOME/.stack/ 7 | 8 | addons: 9 | apt: 10 | packages: 11 | - libgmp-dev 12 | - llvm-9-dev 13 | - clang-9 14 | 15 | before_install: 16 | - mkdir -p ~/.local/bin 17 | - export PATH=$HOME/.local/bin:$PATH 18 | - travis_retry curl -L https://www.stackage.org/stack/linux-x86_64 | tar xz --wildcards --strip-components=1 -C ~/.local/bin '*/stack' 19 | 20 | install: 21 | - stack --no-terminal --install-ghc test --only-dependencies 22 | 23 | script: 24 | - stack --no-terminal test --haddock --no-haddock-deps 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copirigth (c) 2017, Ievgenii Meshcheriakov 2 | 3 | Based on Haskell LLVM Tutorial by Stephen Diehl 4 | 5 | Copyright (c) 2013-2016, Stephen Diehl 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to 9 | deal in the Software without restriction, including without limitation the 10 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 11 | sell copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 23 | IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | IMP Compiler Implemented in Haskell 2 | =================================== 3 | 4 | [![Build Status](https://travis-ci.org/eugmes/imp.svg?branch=master)](https://travis-ci.org/eugmes/imp) 5 | 6 | This is an implementation of IMP compiler in Haskell programming 7 | language. It uses [LLVM](http://llvm.org) as compiler backend. 8 | [Megaparsec](https://hackage.haskell.org/package/megaparsec) is used 9 | for parsing. 10 | 11 | The programming language is specified in this document 12 | . 13 | 14 | This work is inspired by [Hakell LLVM Tutorial](http://www.stephendiehl.com/llvm/) 15 | by Stephen Diehl. 16 | 17 | Building 18 | -------- 19 | 20 | It is recommended to use [stack](https://docs.haskellstack.org/en/stable/README/) 21 | to build the program: 22 | 23 | ``` 24 | % stack build 25 | ``` 26 | 27 | After this IMP programs can be compiled by running: 28 | 29 | ``` 30 | % stack exec -- impc test/examples/hello.imp 31 | % ./hello 32 | Hello World! 33 | ``` 34 | 35 | This runs `cc`. To specify a different compiler, use `--cc` command line option 36 | or `CC` environment variable. 37 | 38 | Extensions 39 | ---------- 40 | 41 | The language has several extensions compared to the original specification. 42 | All of the extensions are taken from [Ada](http://www.ada-auth.org/standards/12rm/html/RM-TOC.html): 43 | 44 | - String literals can have embedded quotation marks by repeating them twice. 45 | 46 | - If expressions can have additional `elsif` parts. 47 | 48 | - Function and procedure arguments can have `in`, `out`, and `in out` modes. 49 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /imp.cabal: -------------------------------------------------------------------------------- 1 | cabal-version: 1.12 2 | 3 | -- This file has been generated from package.yaml by hpack version 0.33.0. 4 | -- 5 | -- see: https://github.com/sol/hpack 6 | -- 7 | -- hash: 44d6b791288837604c1803be49bdd33320fcf764b6ef7199efa5034dba5b31b7 8 | 9 | name: imp 10 | version: 0.1.0.0 11 | synopsis: Compiler for IMP programming language 12 | description: This is a compiler for IMP programming language implemented in Haskell. It uses LLVM as compiler backend. 13 | category: Compiler 14 | homepage: https://github.com/eugmes/imp#readme 15 | bug-reports: https://github.com/eugmes/imp/issues 16 | author: Ievgenii Meshcheriakov 17 | maintainer: Ievgenii Meshcheriakov 18 | copyright: 2017-2018 Ievgenii Meshcheriakov 19 | license: MIT 20 | license-file: LICENSE 21 | build-type: Simple 22 | data-files: 23 | stdlib/impstd.c 24 | stdlib/impstd.h 25 | 26 | source-repository head 27 | type: git 28 | location: https://github.com/eugmes/imp 29 | 30 | library 31 | exposed-modules: 32 | IMP.AST 33 | IMP.Codegen.Error 34 | IMP.Codegen.GlobalCodegen 35 | IMP.Codegen.SubCodegen 36 | IMP.Codegen.Utils 37 | IMP.Emit 38 | IMP.Parser 39 | IMP.Parser.Error 40 | IMP.SourceLoc 41 | IMP.SymbolTable 42 | IMP.Types 43 | Test.Tasty.IMP 44 | other-modules: 45 | Paths_imp 46 | hs-source-dirs: 47 | lib 48 | ghc-options: -Wall -fno-warn-name-shadowing -Wcompat 49 | build-depends: 50 | base >=4.7 && <5 51 | , bytestring 52 | , composition-extra 53 | , containers 54 | , filepath 55 | , llvm-hs-pure ==9.* 56 | , megaparsec ==8.* 57 | , mtl 58 | , process-extras 59 | , stringbuilder 60 | , tasty 61 | , temporary 62 | , text 63 | , text-show 64 | , utf8-string 65 | default-language: Haskell2010 66 | 67 | executable impc 68 | main-is: Main.hs 69 | other-modules: 70 | Paths_imp 71 | hs-source-dirs: 72 | src 73 | ghc-options: -Wall -fno-warn-name-shadowing -Wcompat 74 | build-depends: 75 | base 76 | , bytestring 77 | , containers 78 | , filepath 79 | , imp 80 | , llvm-hs ==9.* 81 | , megaparsec ==8.* 82 | , optparse-applicative 83 | , process 84 | , temporary 85 | , text 86 | default-language: Haskell2010 87 | 88 | test-suite testsuite 89 | type: exitcode-stdio-1.0 90 | main-is: TestSuite.hs 91 | other-modules: 92 | CompilerTests 93 | IMP.Test.Parser 94 | IMP.Test.Tests 95 | Paths_imp 96 | hs-source-dirs: 97 | test 98 | ghc-options: -Wall -fno-warn-name-shadowing -Wcompat 99 | build-depends: 100 | base 101 | , imp 102 | , megaparsec ==8.* 103 | , tasty 104 | , tasty-hunit 105 | , text 106 | default-language: Haskell2010 107 | -------------------------------------------------------------------------------- /lib/IMP/AST.hs: -------------------------------------------------------------------------------- 1 | module IMP.AST where 2 | 3 | import IMP.SourceLoc 4 | 5 | import Data.List.NonEmpty (NonEmpty) 6 | import qualified Data.Text as T 7 | 8 | data Program = Program [Located VarDec] [Located Subroutine] deriving Show 9 | 10 | data VarDec = VarDec [Located ID] (Located Type) deriving Show 11 | 12 | newtype ID = ID {getID :: T.Text} deriving (Show, Eq, Ord) 13 | 14 | data Type = IntegerType | BooleanType | StringType deriving (Show, Eq, Ord) 15 | 16 | newtype Number = Number Integer deriving (Show, Eq, Ord) 17 | 18 | data Subroutine = Procedure (Located ID) [Located ParamList] [Located VarDec] Statements 19 | | Function (Located ID) [Located ParamList] (Located Type) [Located VarDec] Statements 20 | deriving Show 21 | 22 | data ParamList = ParamList [Located ID] Mode (Located Type) deriving Show 23 | 24 | data Mode = ModeIn 25 | | ModeOut 26 | | ModeInOut 27 | deriving (Eq, Ord, Show) 28 | 29 | data Statement = IfStatement (NonEmpty ConditionWithStatements) Statements 30 | | WhileStatement (Located Expression) Statements 31 | | AssignStatement (Located ID) (Located Expression) 32 | | CallStatement (Located ID) [Located Expression] 33 | | InputStatement (Located ID) 34 | | OutputStatement (Located Expression) 35 | | NullStatement 36 | | BreakStatement 37 | | ReturnStatement 38 | | ReturnValStatement (Located Expression) 39 | | HaltStatement 40 | | NewlineStatement 41 | deriving Show 42 | 43 | type Statements = [Located Statement] 44 | 45 | type ConditionWithStatements = (Located Expression, Statements) 46 | 47 | data Expression = UnOpExp UnaryOp (Located Expression) 48 | | BinOpExp (Located Expression) BinaryOp (Located Expression) 49 | | NumberExpression Number 50 | | BoolExpression Bool 51 | | IdExpression (Located ID) 52 | | CallExpression (Located ID) [Located Expression] 53 | | StringLiteralExpression (Located T.Text) 54 | deriving Show 55 | 56 | data UnaryOp = OpNot | OpNeg deriving (Show, Eq, Ord) 57 | 58 | data BinaryOp = OpEQ | OpLT | OpLE | OpGT | OpGE | OpNE 59 | | OpMul | OpDiv | OpMod | OpAnd 60 | | OpAdd | OpSub | OpOr deriving (Show, Eq, Ord) 61 | -------------------------------------------------------------------------------- /lib/IMP/Codegen/Error.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts #-} 2 | 3 | module IMP.Codegen.Error 4 | ( CodegenError(..) 5 | , throwLocatedError 6 | , locatedErrorPretty 7 | ) where 8 | 9 | import IMP.AST 10 | import IMP.SourceLoc 11 | import Text.Megaparsec.Error -- TODO use custom class here 12 | import Text.Printf 13 | import Control.Monad.Except 14 | 15 | data CodegenError = InternalError String 16 | | TypeMismatch Type Type -- ^ Types don't match 17 | | InvalidNumberOfArguments -- ^ Invalid number of arguments 18 | | NonBooleanIfCondition -- ^ Boolean expression is expected as if statement condition 19 | | NonBooleanWhileCondition -- ^ Boolean expression is expected as while statement condition 20 | | NotAVariable ID -- ^ ... is not a variable 21 | | AttemptToCallAVariable -- ^ Attempt to call a variable 22 | | AttemptToCallAFunctionAsProcedure 23 | | AttemptToCallAProcedureAsFunction 24 | | InputError 25 | | BreakOutsideOfLoop 26 | | VoidReturnInFunction 27 | | NonVoidReturnInProcedure 28 | | UnaryOpTypeMismatch UnaryOp Type 29 | | BinaryOpTypeMismatch BinaryOp Type 30 | | AttemptToReadSubroutine 31 | | GlobalRedefinition ID 32 | | LocalRedefinition ID 33 | | SymbolNotInScope ID 34 | | MainIsAFunction 35 | | MainHasArguments 36 | | IntegerLiteralOutOfTypeRange Number 37 | | AssignmentToConstant 38 | | ConstantExpressionAsParameter Mode 39 | deriving (Eq, Ord) 40 | 41 | eShow :: CodegenError -> String 42 | eShow (InternalError msg) = printf "Internal error: %s" msg 43 | eShow (TypeMismatch leftType rightType) = printf "Types dont't match: '%s' vs '%s'." (show leftType) (show rightType) 44 | eShow InvalidNumberOfArguments = "Invalid number of arguments." 45 | eShow NonBooleanIfCondition = "Boolean expression is expected as if statement condition." 46 | eShow NonBooleanWhileCondition = "Boolean expression is expected as while statement condition." 47 | eShow (NotAVariable name) = printf "'%s' is not a variable." (getID name) 48 | eShow AttemptToCallAVariable = "Attempt to call a variable." 49 | eShow AttemptToCallAFunctionAsProcedure = "Attempt to call a function as procedure." 50 | eShow AttemptToCallAProcedureAsFunction = "Attempt to call a procedure as function." 51 | eShow InputError = "Attempt to input something other than integer." 52 | eShow BreakOutsideOfLoop = "'break' outside of a loop." 53 | eShow VoidReturnInFunction = "Function should return a value." 54 | eShow NonVoidReturnInProcedure = "Procedure cannot return a value." 55 | eShow (UnaryOpTypeMismatch unaryOp ty) = printf "Type mismatch for unary operator '%s': '%s'." (show unaryOp) (show ty) 56 | eShow (BinaryOpTypeMismatch binaryOp ty) = printf "Type mismatch for binary operator '%s': '%s'." (show binaryOp) (show ty) 57 | eShow AttemptToReadSubroutine = "Attempt to read value of subroutine." 58 | eShow (GlobalRedefinition name) = printf "Attempt to redefine global symbol '%s'." (getID name) 59 | eShow (LocalRedefinition name) = printf "Attempt to redefine local symbol '%s'." (getID name) 60 | eShow (SymbolNotInScope name) = printf "Symbol not in scope: '%s'." (getID name) 61 | eShow MainIsAFunction = "'main' should be a procedure." 62 | eShow MainHasArguments = "'main' should be a procedure with no arguments." 63 | eShow (IntegerLiteralOutOfTypeRange (Number n)) = printf "Integer literal is outside of allowed range: %d." n 64 | eShow AssignmentToConstant = printf "Cannot assign value to constant." 65 | eShow (ConstantExpressionAsParameter mode) = printf "Attempt to pass a constant expression as parameter with mode %s." (show mode) -- TODO: better wording 66 | 67 | instance ShowErrorComponent CodegenError where 68 | showErrorComponent = eShow 69 | 70 | throwLocatedError :: (WithLoc m, MonadError (Located e) m) => e -> m a 71 | throwLocatedError e = do 72 | loc <- currentLoc 73 | throwError $ Located loc e 74 | 75 | -- TODO remove this hack and create some custom class 76 | locatedErrorPretty :: ShowErrorComponent a => Located a -> String 77 | locatedErrorPretty e = showErrorComponent $ unLoc e 78 | -------------------------------------------------------------------------------- /lib/IMP/Codegen/GlobalCodegen.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE GeneralizedNewtypeDeriving #-} 2 | {-# LANGUAGE OverloadedStrings #-} 3 | {-# LANGUAGE FlexibleContexts #-} 4 | 5 | module IMP.Codegen.GlobalCodegen 6 | ( MonadCodegen(..) 7 | , GlobalCodegen 8 | , CodegenOptions(..) 9 | , getSymtab 10 | , execGlobalCodegen 11 | , defineVar 12 | , declareProc 13 | , declareFun 14 | , defineSub 15 | ) where 16 | 17 | import Paths_imp 18 | import IMP.Codegen.Utils 19 | 20 | import qualified IMP.AST as I 21 | import IMP.AST (getID) 22 | import qualified IMP.SymbolTable as Tab 23 | import IMP.SourceLoc 24 | import IMP.Codegen.Error 25 | import IMP.Types 26 | import qualified LLVM.AST as AST 27 | import LLVM.AST hiding (type', functionAttributes, metadata, mkName) 28 | import LLVM.AST.Type hiding (void) 29 | import LLVM.AST.Global hiding (metadata) 30 | import LLVM.AST.Constant hiding (type') 31 | import qualified LLVM.AST.Constant as C 32 | import LLVM.AST.Linkage 33 | import qualified Data.Set as Set 34 | import LLVM.AST.DataLayout 35 | import LLVM.Prelude (ShortByteString) 36 | import Control.Monad.State 37 | import Control.Monad.Except 38 | import Data.String 39 | import qualified Data.Text as T 40 | import qualified Data.Text.Encoding as TE 41 | import Data.Version 42 | import qualified Data.ByteString.UTF8 as U8 43 | import qualified Data.ByteString as B 44 | import TextShow (showt) 45 | 46 | class (MonadError (Located CodegenError) m, WithLoc m) => MonadCodegen m where 47 | emitString :: T.Text -> m Operand 48 | useStdCall :: StandardCall -> m () 49 | 50 | data CodegenOptions = CodegenOptions 51 | { sourceFileName :: FilePath 52 | , dataLayout :: DataLayout 53 | , targetTriple :: ShortByteString 54 | } deriving Show 55 | 56 | data CodegenState = CodegenState 57 | { currentModule :: AST.Module 58 | , symtab :: SymbolTable 59 | , nextStringNum :: Word 60 | , usedCalls :: Set.Set StandardCall 61 | , nextMetadataNum :: Word 62 | , location :: SourcePos 63 | } deriving Show 64 | 65 | newtype GlobalCodegen a = GlobalCodegen 66 | { runGlobalCodegen :: StateT CodegenState (Except (Located CodegenError)) a 67 | } deriving ( Functor 68 | , Applicative 69 | , Monad 70 | , MonadFix 71 | , MonadState CodegenState 72 | , MonadError (Located CodegenError)) 73 | 74 | instance WithLoc GlobalCodegen where 75 | withNewLoc pos action = do 76 | oldPos <- gets location 77 | modify (\s -> s {location = pos}) 78 | result <- action 79 | modify (\s -> s {location = oldPos}) 80 | return result 81 | 82 | currentLoc = gets location 83 | 84 | instance MonadCodegen GlobalCodegen where 85 | emitString = globalEmitString 86 | useStdCall = globalUseStdCall 87 | 88 | execGlobalCodegen :: CodegenOptions -> GlobalCodegen a -> Either (Located CodegenError) AST.Module 89 | execGlobalCodegen opts m = 90 | fmap currentModule $ runExcept $ execStateT (runGlobalCodegen m') s 91 | where 92 | m' = m >> emitCompilerInfo 93 | md = emptyModule opts 94 | s = CodegenState md Tab.empty 0 Set.empty 0 (initialPos $ sourceFileName opts) 95 | 96 | emptyModule :: CodegenOptions -> AST.Module 97 | emptyModule opts = 98 | defaultModule 99 | { moduleName = fromString $ sourceFileName opts 100 | , moduleSourceFileName = fromString $ sourceFileName opts 101 | , moduleDataLayout = Just $ dataLayout opts 102 | , moduleTargetTriple = Just $ targetTriple opts 103 | } 104 | 105 | getSymtab :: GlobalCodegen SymbolTable 106 | getSymtab = gets symtab 107 | 108 | -- | Add symbol to the global symbol table 109 | -- 110 | -- TODO Insert location information into symbol table 111 | addSym :: SymbolType -> Type -> I.ID -> GlobalCodegen () 112 | addSym st lt n = do 113 | syms <- gets symtab 114 | let sym = (st, ConstantOperand $ GlobalReference lt (mkName $ getID n)) 115 | case Tab.insert n sym syms of 116 | Left _ -> throwLocatedError $ GlobalRedefinition n 117 | Right syms' -> modify $ \s -> s { symtab = syms' } 118 | 119 | -- | Add global definition 120 | addDefn :: Definition -> GlobalCodegen () 121 | addDefn d = do 122 | m <- gets currentModule 123 | 124 | let defs = moduleDefinitions m 125 | modify $ \s -> s { currentModule = m { moduleDefinitions = defs ++ [d] }} 126 | 127 | -- | Convert subprogram prameter type to LLVM type 128 | -- 129 | -- Currently all parameters are passed by value 130 | -- 131 | paramTypeToLLVM :: Argument -> Type 132 | paramTypeToLLVM (argty, _) = typeToLLVM argty 133 | 134 | outArgTypes :: [Argument] -> [Type] 135 | outArgTypes = map paramTypeToLLVM . filter (argReturned . argumentHandling) 136 | 137 | filterInArgs :: [Argument] -> [Argument] 138 | filterInArgs = filter (argInitialized . argumentHandling) 139 | 140 | functionReturnType :: I.Type -> [Argument] -> Type 141 | functionReturnType retty argtys = 142 | case typeToLLVM retty : outArgTypes argtys of 143 | [ty] -> ty 144 | tys -> StructureType False tys 145 | 146 | procedureReturnType :: [Argument] -> Type 147 | procedureReturnType argtys = 148 | case outArgTypes argtys of 149 | [] -> VoidType 150 | [ty] -> ty 151 | tys -> StructureType False tys 152 | 153 | -- | Declares function in global symbol table 154 | declareFun :: I.ID -> I.Type -> [Argument] -> GlobalCodegen () 155 | declareFun label retty argtys = addSym symt t label 156 | where 157 | symt = SymbolFunction retty argtys 158 | t = ptr $ FunctionType rt (map paramTypeToLLVM $ filterInArgs argtys) False 159 | rt = functionReturnType retty argtys 160 | 161 | -- | Declares procedure in global symbol table 162 | declareProc :: I.ID -> [Argument] -> GlobalCodegen () 163 | declareProc label argtys = addSym symt t label 164 | where 165 | symt = SymbolProcedure argtys 166 | t = ptr $ FunctionType (procedureReturnType argtys) (map paramTypeToLLVM $ filterInArgs argtys) False 167 | 168 | -- | Adds global subprogram definition 169 | defineSub :: I.ID -> Maybe I.Type -> [(I.Type, I.Mode, Located I.ID)] -> [BasicBlock] -> GlobalCodegen () 170 | defineSub label retty argtys body = addDefn def 171 | where 172 | argtys' = map (\(ty, mode, _id) -> (ty, mode)) argtys 173 | rt = case retty of 174 | Nothing -> procedureReturnType argtys' 175 | Just ty -> functionReturnType ty argtys' 176 | 177 | def = GlobalDefinition $ 178 | functionDefaults { name = (mkName . getID) label 179 | , parameters = ([Parameter (paramTypeToLLVM (ty, mode)) ((mkName . getID . unLoc) nm) [] 180 | | (ty, mode, nm) <- argtys, mode /= I.ModeOut], False) 181 | , returnType = rt 182 | , basicBlocks = body 183 | } 184 | 185 | -- | Add global variable definition 186 | -- 187 | -- Also adds this variable to symbol table 188 | defineVar :: I.Type -> IsConstant -> I.ID -> GlobalCodegen () 189 | defineVar ty isConst label = addSym (SymbolVariable ty isConst) (ptr t) label >> addDefn def 190 | where 191 | n = mkName $ getID label 192 | t = typeToLLVM ty 193 | def = GlobalDefinition $ globalVariableDefaults { name = n, type' = t, initializer = Just $ Undef t } 194 | 195 | newStringName :: GlobalCodegen Name 196 | newStringName = do 197 | n <- gets nextStringNum 198 | modify $ \s -> s { nextStringNum = n + 1 } 199 | return $ mkName $ ".str." <> showt n 200 | 201 | globalEmitString :: T.Text -> GlobalCodegen Operand 202 | globalEmitString s = do 203 | name <- newStringName 204 | let d = GlobalDefinition $ 205 | globalVariableDefaults { name = name 206 | , linkage = Private 207 | , unnamedAddr = Just GlobalAddr 208 | , isConstant = True 209 | , type' = ArrayType { nArrayElements = size 210 | , elementType = i8 } 211 | , initializer = Just ini } 212 | ty = ptr $ ArrayType (fromIntegral size) i8 213 | addr = GlobalReference ty name 214 | op = ConstantOperand $ C.GetElementPtr True addr [ C.Int (typeBits integer) 0 215 | , C.Int (typeBits integer) 0 ] 216 | addDefn d 217 | return op 218 | where 219 | content :: U8.ByteString 220 | content = TE.encodeUtf8 s 221 | vals = map (C.Int 8 . fromIntegral) (B.unpack content ++ [0]) 222 | size = fromIntegral $ length vals 223 | ini = C.Array i8 vals 224 | 225 | globalUseStdCall :: StandardCall -> GlobalCodegen () 226 | globalUseStdCall c = do 227 | used <- gets usedCalls 228 | unless (Set.member c used) $ do 229 | modify $ \s -> s { usedCalls = Set.insert c used } 230 | emitStdCallDecl c 231 | 232 | emitStdCallDecl :: StandardCall -> GlobalCodegen () 233 | emitStdCallDecl c = addDefn d 234 | where 235 | retty = stdCallType c 236 | n = stdCallName c 237 | args = stdCallArgs c 238 | attrs = Right <$> stdCallAttrs c 239 | d = GlobalDefinition $ 240 | functionDefaults { name = n 241 | , parameters = ([Parameter ty argName [] | (ty, argName) <- args], False) 242 | , returnType = retty 243 | , functionAttributes = attrs 244 | } 245 | 246 | namedMetadata :: ShortByteString -> [MetadataNodeID] -> GlobalCodegen () 247 | namedMetadata name ids = addDefn $ NamedMetadataDefinition name ids 248 | 249 | newMetadataNodeID :: GlobalCodegen MetadataNodeID 250 | newMetadataNodeID = do 251 | n <- gets nextMetadataNum 252 | modify $ \s -> s { nextMetadataNum = n + 1 } 253 | return $ MetadataNodeID n 254 | 255 | metadata :: MDNode -> GlobalCodegen MetadataNodeID 256 | metadata node = do 257 | nd <- newMetadataNodeID 258 | addDefn $ MetadataNodeDefinition nd node 259 | return nd 260 | 261 | emitCompilerInfo :: GlobalCodegen () 262 | emitCompilerInfo = do 263 | nd <- metadata $ MDTuple [Just $ MDString $ fromString $ "IMP version " ++ showVersion version] 264 | namedMetadata "llvm.ident" [nd] 265 | -------------------------------------------------------------------------------- /lib/IMP/Codegen/SubCodegen.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE GeneralizedNewtypeDeriving #-} 2 | {-# LANGUAGE OverloadedStrings #-} 3 | {-# LANGUAGE LambdaCase #-} 4 | 5 | -- | Gode generation for subroutines. 6 | 7 | module IMP.Codegen.SubCodegen 8 | ( SubCodegen 9 | , getLoopExitBlock 10 | , execSubCodegen 11 | , block 12 | , alloca 13 | , store 14 | , load 15 | , defineLocalVar 16 | , setExitBlock 17 | , setBlock 18 | , br 19 | , cbr 20 | , ret 21 | , unreachable 22 | , entry 23 | , callFunc 24 | , withLoopExit 25 | , getVar 26 | , apiFunCall 27 | , apiProcCall 28 | , exit 29 | , notInstr 30 | , instr 31 | ) where 32 | 33 | import IMP.Codegen.Utils 34 | 35 | import qualified IMP.AST as I 36 | import qualified IMP.SymbolTable as Tab 37 | import IMP.SourceLoc 38 | import IMP.Codegen.GlobalCodegen 39 | import IMP.Codegen.Error 40 | import IMP.Types 41 | import qualified LLVM.AST as AST 42 | import LLVM.AST hiding (functionAttributes, metadata, mkName) 43 | import qualified LLVM.AST.CallingConvention as CC 44 | import qualified Data.Map.Strict as Map 45 | import qualified LLVM.AST.FunctionAttribute as FA 46 | import Control.Monad.State 47 | import Control.Monad.Except 48 | import Data.List 49 | import Data.Function 50 | import qualified Data.Text as T 51 | import Text.Printf 52 | import TextShow (showt) 53 | 54 | type Names = Map.Map T.Text Int 55 | 56 | data CodegenState = CodegenState 57 | { currentBlock :: Name 58 | -- ^ Name of the active block to append to 59 | , exitBlock :: Maybe Name 60 | -- ^ Block containing return 61 | , subReturn :: Maybe (I.Type, Operand) 62 | -- ^ Subroutine return type and location 63 | , blocks :: Map.Map Name BlockState 64 | -- ^ Blocks of function 65 | , symtab :: SymbolTable 66 | -- ^ Function scope symbol table 67 | , blockCount :: Int 68 | -- ^ Count of basic blocks 69 | , count :: Word 70 | -- ^ Count of unnamed instructions 71 | , names :: Names 72 | -- ^ Name supply 73 | , location :: SourcePos 74 | , loopExitBlock :: Maybe Name 75 | -- ^ Exit block for innermost loop 76 | } deriving Show 77 | 78 | data BlockState = BlockState 79 | { idx :: Int 80 | -- ^ Block index 81 | , stack :: [Named Instruction] 82 | -- ^ Stack of unstructions 83 | , term :: Maybe (Named Terminator) 84 | -- ^ Block terminator 85 | } deriving Show 86 | 87 | emptyBlock :: Int -> BlockState 88 | emptyBlock ix = BlockState ix [] Nothing 89 | 90 | newtype SubCodegen a = SubCodegen 91 | { runSubCodegen :: StateT CodegenState GlobalCodegen a 92 | } deriving ( Functor 93 | , Applicative 94 | , Monad 95 | , MonadFix 96 | , MonadError (Located CodegenError) 97 | , MonadState CodegenState 98 | ) 99 | 100 | instance WithLoc SubCodegen where 101 | withNewLoc pos action = do 102 | oldPos <- gets location 103 | modify (\s -> s { location = pos }) 104 | result <- action 105 | modify (\s -> s { location = oldPos }) 106 | return result 107 | 108 | currentLoc = gets location 109 | 110 | -- TODO ensure location is set correctly 111 | liftG :: GlobalCodegen a -> SubCodegen a 112 | liftG = SubCodegen . lift 113 | 114 | instance MonadCodegen SubCodegen where 115 | emitString = liftG . emitString 116 | useStdCall = liftG . useStdCall 117 | 118 | sortBlocks :: [(Name, BlockState)] -> [(Name, BlockState)] 119 | sortBlocks = sortBy (compare `on` (idx . snd)) 120 | 121 | createBlocks :: MonadCodegen m => CodegenState -> m [BasicBlock] 122 | createBlocks = traverse makeBlock . sortBlocks . Map.toList . blocks 123 | 124 | makeBlock :: MonadCodegen m => (Name, BlockState) -> m BasicBlock 125 | makeBlock (l, BlockState _ s t) = 126 | case t of 127 | Just term -> return $ BasicBlock l (reverse s) term 128 | Nothing -> throwLocatedError $ InternalError $ printf "Block has no terminator: '%s'." (show l) 129 | 130 | newCodegenState :: GlobalCodegen CodegenState 131 | newCodegenState = do 132 | syms <- getSymtab 133 | pos <- currentLoc 134 | 135 | return CodegenState 136 | { currentBlock = mkName "" 137 | , exitBlock = Nothing 138 | , subReturn = Nothing 139 | , blocks = Map.empty 140 | , symtab = Tab.newScope syms 141 | , blockCount = 1 142 | , count = 0 143 | , names = Map.empty 144 | , location = pos 145 | , loopExitBlock = Nothing 146 | } 147 | 148 | execSubCodegen :: SubCodegen a -> GlobalCodegen [BasicBlock] 149 | execSubCodegen m = newCodegenState >>= execStateT (runSubCodegen m) >>= createBlocks 150 | 151 | withLoopExit :: Name -> SubCodegen a -> SubCodegen a 152 | withLoopExit newExitB action = do 153 | oldExitB <- gets loopExitBlock 154 | modify (\s -> s { loopExitBlock = Just newExitB }) 155 | result <- action 156 | modify (\s -> s { loopExitBlock = oldExitB }) 157 | return result 158 | 159 | entry :: SubCodegen Name 160 | entry = gets currentBlock 161 | 162 | exit :: SubCodegen (Name, Maybe (I.Type, Operand)) 163 | exit = 164 | gets exitBlock >>= \case 165 | Just bname -> do 166 | t <- gets subReturn 167 | return (bname, t) 168 | Nothing -> throwLocatedError $ InternalError "Exit block was not set." 169 | 170 | block :: T.Text -> SubCodegen Name 171 | block bname = do 172 | bls <- gets blocks 173 | ix <- gets blockCount 174 | nms <- gets names 175 | 176 | let new = emptyBlock ix 177 | (qname, supply) = uniqueName bname nms 178 | newName = mkName qname 179 | 180 | modify $ \s -> s { blocks = Map.insert newName new bls 181 | , blockCount = ix + 1 182 | , names = supply 183 | , currentBlock = newName 184 | } 185 | return newName 186 | 187 | setBlock :: Name -> SubCodegen () 188 | setBlock bname = modify $ \s -> s { currentBlock = bname } 189 | 190 | setExitBlock :: Maybe (I.Type, Operand) -> Name -> SubCodegen () 191 | setExitBlock ty bname = 192 | modify $ \s -> s { subReturn = ty, exitBlock = Just bname } 193 | 194 | modifyBlock :: BlockState -> SubCodegen () 195 | modifyBlock new = do 196 | active <- gets currentBlock 197 | modify $ \s -> s { blocks = Map.insert active new (blocks s) } 198 | 199 | current :: SubCodegen BlockState 200 | current = do 201 | c <- gets currentBlock 202 | blks <- gets blocks 203 | case Map.lookup c blks of 204 | Just x -> return x 205 | Nothing -> throwLocatedError $ InternalError $ printf "No such block: '%s'" (show c) 206 | 207 | fresh :: SubCodegen Word 208 | fresh = do 209 | i <- gets count 210 | modify $ \s -> s { count = i + 1 } 211 | return $ i + 1 212 | 213 | uniqueName :: T.Text -> Names -> (T.Text, Names) 214 | uniqueName nm ns = 215 | case Map.lookup nm ns of 216 | Nothing -> (nm, Map.insert nm 1 ns) 217 | Just ix -> (nm <> showt ix, Map.insert nm (ix + 1) ns) 218 | 219 | defineLocalVar :: I.ID -> I.Type -> IsConstant -> Operand -> SubCodegen () 220 | defineLocalVar name ty isConst x = do 221 | syms <- gets symtab 222 | case Tab.insert name (SymbolVariable ty isConst, x) syms of 223 | Left _ -> 224 | throwLocatedError $ LocalRedefinition name 225 | Right syms' -> 226 | modify $ \s -> s { symtab = syms' } 227 | 228 | getVar :: I.ID -> SubCodegen (SymbolType, Operand) 229 | getVar var = do 230 | syms <- gets symtab 231 | case Tab.lookup var syms of 232 | Just x -> return x 233 | Nothing -> throwLocatedError $ SymbolNotInScope var 234 | 235 | instr :: Type -> Instruction -> SubCodegen Operand 236 | instr ty ins = do 237 | ref <- UnName <$> fresh 238 | namedInstr ref ty ins 239 | 240 | instr' :: T.Text -> Type -> Instruction -> SubCodegen Operand 241 | instr' n = namedInstr (mkName n) 242 | 243 | namedInstr :: Name -> Type -> Instruction -> SubCodegen Operand 244 | namedInstr ref ty ins = do 245 | blk <- current 246 | let i = stack blk 247 | modifyBlock (blk { stack = (ref := ins) : i}) 248 | return $ LocalReference ty ref 249 | 250 | voidInstr :: Instruction -> SubCodegen () 251 | voidInstr ins = do 252 | blk <- current 253 | let i = stack blk 254 | modifyBlock (blk { stack = Do ins : i}) 255 | 256 | alloca :: T.Text -> Type -> SubCodegen Operand 257 | alloca n ty = instr' n ty $ Alloca ty Nothing 0 [] 258 | 259 | store :: Operand -> Operand -> SubCodegen () 260 | store ptr val = 261 | voidInstr $ Store False ptr val Nothing 0 [] 262 | 263 | notInstr :: Operand -> SubCodegen Operand 264 | notInstr op = 265 | instr boolean $ AST.Xor constTrue op [] 266 | 267 | load :: Type -> Operand -> SubCodegen Operand 268 | load ty ptr = 269 | instr ty $ Load False ptr Nothing 0 [] 270 | 271 | callFunc :: Operand -> [Operand] -> [FA.FunctionAttribute] -> [Type] -> SubCodegen [Operand] 272 | callFunc fun args attrs rettys = do 273 | let retty = case rettys of 274 | [] -> VoidType 275 | [ty] -> ty 276 | tys -> StructureType False tys 277 | callInstr = Call Nothing CC.C [] (Right fun) (zip args (repeat [])) (Right <$> attrs) [] 278 | case retty of 279 | VoidType -> do 280 | voidInstr callInstr 281 | pure [] 282 | _ -> do 283 | op <- instr retty callInstr 284 | case rettys of 285 | [_] -> pure [op] 286 | tys -> 287 | forM (zip tys [0..]) $ \(ty, idx) -> 288 | instr ty $ ExtractValue op [idx] [] 289 | 290 | terminator :: Named Terminator -> SubCodegen () 291 | terminator trm = do 292 | blk <- current 293 | modifyBlock (blk { term = Just trm }) 294 | 295 | br :: Name -> SubCodegen () 296 | br val = terminator $ Do $ Br val [] 297 | 298 | cbr :: Operand -> Name -> Name -> SubCodegen () 299 | cbr cond tr fl = terminator $ Do $ CondBr cond tr fl [] 300 | 301 | ret :: Maybe Operand -> SubCodegen () 302 | ret val = terminator $ Do $ Ret val [] 303 | 304 | unreachable :: SubCodegen () 305 | unreachable = terminator $ Do $ Unreachable [] 306 | 307 | apiFunCall :: StandardCall -> [Operand] -> SubCodegen Operand 308 | apiFunCall c args = do 309 | useStdCall c 310 | callFunc op args attrs [retty] >>= \case 311 | [res] -> pure res 312 | _ -> throwLocatedError $ InternalError "Unexpected return value for builtin function" 313 | where 314 | retty = stdCallType c 315 | op = stdCallOp c 316 | attrs = stdCallAttrs c 317 | 318 | apiProcCall :: StandardCall -> [Operand] -> SubCodegen () 319 | apiProcCall c args = do 320 | useStdCall c 321 | callFunc op args attrs [] >>= \case 322 | [] -> pure () 323 | _ -> throwLocatedError $ InternalError "Unexpected return value for builtin procedure" 324 | where 325 | op = stdCallOp c 326 | attrs = stdCallAttrs c 327 | 328 | getLoopExitBlock :: SubCodegen (Maybe Name) 329 | getLoopExitBlock = gets loopExitBlock 330 | -------------------------------------------------------------------------------- /lib/IMP/Codegen/Utils.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module IMP.Codegen.Utils 4 | ( StandardCall(..) 5 | , SymbolTable 6 | , stdCallName 7 | , stdCallType 8 | , stdCallArgs 9 | , stdCallAttrs 10 | , stdCallOp 11 | , typeToLLVM 12 | , integer 13 | , constTrue 14 | , constFalse 15 | , constZero 16 | , boolean 17 | , checkIntegerBounds 18 | , mkName 19 | ) where 20 | 21 | import qualified IMP.AST as I 22 | import qualified IMP.SymbolTable as Tab 23 | import IMP.Types 24 | 25 | import LLVM.AST hiding (mkName) 26 | import qualified LLVM.AST.FunctionAttribute as FA 27 | import LLVM.AST.Constant as C 28 | import LLVM.AST.Type 29 | import LLVM.AST.AddrSpace 30 | import Data.Int 31 | import qualified Data.Text as T 32 | import qualified Data.Text.Encoding as TE 33 | import Data.ByteString.Short (toShort) 34 | 35 | data StandardCall = CallInputInteger 36 | | CallOutputInteger 37 | | CallOutputBoolean 38 | | CallOutputString 39 | | CallHalt 40 | | CallNewline 41 | | CallSAddWithOverflow 42 | | CallSSubWithOverflow 43 | | CallSMulWithOverflow 44 | | CallConstraintErrorEx 45 | | CallProgramErrorEx 46 | deriving (Show, Ord, Bounded, Eq) 47 | 48 | stdCallName :: StandardCall -> Name 49 | stdCallName CallInputInteger = "_IMP_input_integer" 50 | stdCallName CallOutputInteger = "_IMP_output_integer" 51 | stdCallName CallOutputBoolean = "_IMP_output_boolean" 52 | stdCallName CallOutputString = "_IMP_output_string" 53 | stdCallName CallHalt = "_IMP_halt" 54 | stdCallName CallNewline = "_IMP_newline" 55 | -- NOTE this call names will need to change if integer size is changed 56 | stdCallName CallSAddWithOverflow = "llvm.sadd.with.overflow.i32" 57 | stdCallName CallSSubWithOverflow = "llvm.ssub.with.overflow.i32" 58 | stdCallName CallSMulWithOverflow = "llvm.smul.with.overflow.i32" 59 | stdCallName CallConstraintErrorEx = "_IMP_constraint_error_ex" 60 | stdCallName CallProgramErrorEx = "_IMP_program_error_ex" 61 | 62 | stdCallType :: StandardCall -> Type 63 | stdCallType CallInputInteger = integer 64 | stdCallType CallSAddWithOverflow = integerAndBoolean 65 | stdCallType CallSSubWithOverflow = integerAndBoolean 66 | stdCallType CallSMulWithOverflow = integerAndBoolean 67 | stdCallType _ = VoidType 68 | 69 | stdCallArgs :: StandardCall -> [(Type, Name)] 70 | stdCallArgs CallOutputInteger = [(integer, "val")] 71 | stdCallArgs CallOutputBoolean = [(boolean, "val")] 72 | stdCallArgs CallOutputString = [(stringType, "s")] 73 | stdCallArgs CallSAddWithOverflow = [(integer, "a"), (integer, "b")] 74 | stdCallArgs CallSSubWithOverflow = [(integer, "a"), (integer, "b")] 75 | stdCallArgs CallSMulWithOverflow = [(integer, "a"), (integer, "b")] 76 | stdCallArgs CallConstraintErrorEx = [(stringType, "file_name"), (integer, "line_no")] 77 | stdCallArgs CallProgramErrorEx = [(stringType, "file_name"), (integer, "line_no")] 78 | stdCallArgs _ = [] 79 | 80 | stdCallAttrs :: StandardCall -> [FA.FunctionAttribute] 81 | stdCallAttrs CallHalt = [FA.NoReturn] 82 | stdCallAttrs CallConstraintErrorEx = [FA.NoReturn] 83 | stdCallAttrs CallProgramErrorEx = [FA.NoReturn] 84 | stdCallAttrs _ = [] 85 | 86 | stdCallOp :: StandardCall -> Operand 87 | stdCallOp c = ConstantOperand $ GlobalReference ty $ stdCallName c 88 | where 89 | retty = stdCallType c 90 | ty = ptr $ FunctionType retty (fst <$> stdCallArgs c) False 91 | 92 | type SymbolTableEntry = (SymbolType, Operand) 93 | 94 | type SymbolTable = Tab.SymbolTable I.ID SymbolTableEntry 95 | 96 | integer, boolean, stringType, integerAndBoolean :: Type 97 | integer = i32 98 | boolean = i1 99 | stringType = PointerType i8 $ AddrSpace 0 100 | integerAndBoolean = StructureType False [integer, boolean] 101 | 102 | checkIntegerBounds :: Integer -> Bool 103 | checkIntegerBounds n = n >= minInteger && n <= maxInteger 104 | where 105 | minInteger = fromIntegral (minBound :: Int32) 106 | maxInteger = fromIntegral (maxBound :: Int32) 107 | 108 | constFalse, constTrue :: Operand 109 | constFalse = ConstantOperand $ C.Int 1 0 110 | constTrue = ConstantOperand $ C.Int 1 1 111 | 112 | constZero :: Type -> Operand 113 | constZero ty = ConstantOperand $ C.Int (typeBits ty) 0 114 | 115 | typeToLLVM :: I.Type -> Type 116 | typeToLLVM I.IntegerType = integer 117 | typeToLLVM I.BooleanType = boolean 118 | typeToLLVM I.StringType = stringType 119 | 120 | mkName :: T.Text -> Name 121 | mkName = Name . toShort . TE.encodeUtf8 122 | -------------------------------------------------------------------------------- /lib/IMP/Emit.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE RecursiveDo #-} 3 | {-# LANGUAGE LambdaCase #-} 4 | 5 | module IMP.Emit ( CodegenOptions(..) 6 | , compileProgram 7 | ) where 8 | 9 | import qualified IMP.AST as I 10 | import IMP.AST (getID) 11 | import IMP.Codegen.GlobalCodegen 12 | import IMP.Codegen.SubCodegen 13 | import IMP.Codegen.Utils 14 | import IMP.Codegen.Error 15 | import IMP.SourceLoc 16 | import IMP.Types 17 | import qualified LLVM.AST as AST 18 | import LLVM.AST hiding (type', mkName) 19 | import qualified LLVM.AST.Constant as C 20 | import qualified LLVM.AST.IntegerPredicate as IP 21 | import Control.Monad 22 | import qualified Data.Text as T 23 | import Text.Megaparsec.Pos (unPos) -- FIXME: Don't use megeparsec here 24 | import Data.Maybe 25 | 26 | data ExpressionValue = ValueReference I.Type Operand 27 | -- ^ Reference to a non-constant variable 28 | | Value I.Type Operand 29 | -- ^ Raw value, unusable for out or in out parameters 30 | 31 | mkValue :: (I.Type, Operand) -> SubCodegen ExpressionValue 32 | mkValue (ty, op) = pure $ Value ty op 33 | 34 | dereference :: ExpressionValue -> SubCodegen (I.Type, Operand) 35 | dereference (ValueReference ty ptr) = do 36 | op <- load (typeToLLVM ty) ptr 37 | pure (ty, op) 38 | 39 | dereference (Value ty op) = pure (ty, op) 40 | 41 | compileProgram :: CodegenOptions -> I.Program -> Either (Located CodegenError) AST.Module 42 | compileProgram opts = execGlobalCodegen opts . codegenProgram 43 | 44 | codegenProgram :: I.Program -> GlobalCodegen () 45 | codegenProgram (I.Program vars subs) = do 46 | mapM_ (withLoc codegenVars) vars 47 | mapM_ (withLoc codegenSubDecl) subs 48 | mapM_ (withLoc codegenSub) subs 49 | 50 | -- | Emit definitions of global variables 51 | codegenVars :: I.VarDec -> GlobalCodegen () 52 | codegenVars (I.VarDec names t) = mapM_ (withLoc $ defineVar (unLoc t) NotConstant) names 53 | 54 | toSig :: [Located I.ParamList] -> [(I.Type, I.Mode, Located I.ID)] 55 | toSig = concatMap (toSig' . unLoc) 56 | where 57 | toSig' (I.ParamList ids mode ty) = map (toSig'' (unLoc ty) mode) ids 58 | toSig'' t mode name = (t, mode, name) 59 | 60 | paramTypes :: [Located I.ParamList] -> [Argument] 61 | paramTypes = fmap (\(ty, mode, _) -> (ty, mode)) . toSig 62 | 63 | codegenSubDecl :: I.Subroutine -> GlobalCodegen () 64 | codegenSubDecl (I.Procedure name params _ _) = 65 | withLoc (`declareProc` ts) name 66 | where 67 | ts = paramTypes params 68 | 69 | codegenSubDecl (I.Function name params retty _ _) = 70 | withLoc (\n -> declareFun n (unLoc retty) ts) name 71 | where 72 | ts = paramTypes params 73 | 74 | codegenSub' :: Located I.ID -> Maybe I.Type -> [Located I.ParamList] -> [Located I.VarDec] -> I.Statements -> GlobalCodegen () 75 | codegenSub' name retty params vars body = do 76 | blocks <- execSubCodegen cg 77 | defineSub (unLoc name) retty args blocks 78 | where 79 | args = toSig params 80 | 81 | cg = mdo 82 | _ <- block "entry" 83 | retval <- forM retty $ \t -> do 84 | op <- alloca ".return" $ typeToLLVM t 85 | return (t, op) 86 | 87 | setExitBlock retval exit 88 | 89 | argLocs <- forM args $ \(ty, mode, a) -> do 90 | let t = typeToLLVM ty 91 | arg = LocalReference t $ mkName $ getID $ unLoc a 92 | varPtr <- alloca (getID (unLoc a) <> ".addr") t 93 | case mode of 94 | I.ModeIn -> do 95 | store varPtr arg 96 | withLoc (\n -> defineLocalVar n ty IsConstant varPtr) a 97 | pure Nothing 98 | I.ModeOut -> do 99 | -- Mode out variables are not initialized 100 | withLoc (\n -> defineLocalVar n ty NotConstant varPtr) a 101 | pure $ Just (ty, varPtr) 102 | I.ModeInOut -> do 103 | store varPtr arg 104 | withLoc (\n -> defineLocalVar n ty NotConstant varPtr) a 105 | pure $ Just (ty, varPtr) 106 | codegenLocals vars 107 | mapM_ (withLoc codegenStatement) body 108 | case retty of 109 | Just _ -> do 110 | raiseProgramError 111 | unreachable 112 | Nothing -> 113 | br exit 114 | 115 | exit <- block "exit" 116 | if unLoc name == I.ID "main" 117 | then do 118 | apiProcCall CallHalt [] 119 | unreachable 120 | else 121 | case catMaybes (retval : argLocs) of 122 | [] -> ret Nothing 123 | [(ty, ptr)] -> do 124 | op <- load (typeToLLVM ty) ptr 125 | ret $ Just op 126 | refs -> do 127 | let actualReturnType = StructureType False $ map (typeToLLVM . fst) refs 128 | storeResult aggr ((ty, op), idx) = do 129 | tmp <- load (typeToLLVM ty) op 130 | instr actualReturnType $ InsertValue aggr tmp [idx] [] 131 | 132 | retAggr <- foldM storeResult (ConstantOperand $ C.Undef actualReturnType) (zip refs [0..]) 133 | ret $ Just retAggr 134 | 135 | codegenSub :: I.Subroutine -> GlobalCodegen () 136 | codegenSub (I.Procedure name params vars body) = do 137 | when ((unLoc name == I.ID "main") && not (null params)) $ 138 | throwLocatedError MainHasArguments 139 | codegenSub' name Nothing params vars body 140 | 141 | codegenSub (I.Function name params retty vars body) = do 142 | when (unLoc name == I.ID "main") $ throwLocatedError MainIsAFunction 143 | codegenSub' name (Just $ unLoc retty) params vars body 144 | 145 | codegenLocals :: [Located I.VarDec] -> SubCodegen () 146 | codegenLocals = mapM_ $ withLoc codegenLocals' 147 | 148 | codegenLocals' :: I.VarDec -> SubCodegen () 149 | codegenLocals' (I.VarDec names ty) = mapM_ (withLoc $ \n -> codegenLocal n $ unLoc ty) names 150 | 151 | -- FIXME Currently all local variables are not constant 152 | codegenLocal :: I.ID -> I.Type -> SubCodegen () 153 | codegenLocal name ty = do 154 | var <- alloca (getID name) $ typeToLLVM ty 155 | defineLocalVar name ty NotConstant var 156 | 157 | typeCheck :: I.Type -> I.Type -> SubCodegen () 158 | typeCheck lt rt = 159 | when (lt /= rt) $ throwLocatedError $ TypeMismatch lt rt 160 | 161 | maybeGenBlock :: T.Text -> Name -> I.Statements -> SubCodegen Name 162 | maybeGenBlock _ contName [] = return contName 163 | 164 | maybeGenBlock newTemlate contName stmts = mdo 165 | e <- entry 166 | newName <- block newTemlate 167 | mapM_ (withLoc codegenStatement) stmts 168 | br contName 169 | 170 | setBlock e 171 | return newName 172 | 173 | -- | Uncoditionally transfer execution to another block 174 | -- 175 | -- Sets a new active block. Operations emitted into this 176 | -- block will be discarded 177 | goto :: Name -> SubCodegen () 178 | goto bname = do 179 | br bname 180 | _ <- block "discard" 181 | return () 182 | 183 | -- | Returns operands for input arguments and types and references for output arguments 184 | codegenArgs :: [Argument] -> [Located I.Expression] -> SubCodegen ([Operand], [(I.Type, Operand)]) 185 | codegenArgs args exps = do 186 | when (length args /= length exps) $ throwLocatedError InvalidNumberOfArguments 187 | r <- zipWithM typeCheckArg args exps 188 | pure (mapMaybe fst r, mapMaybe snd r) 189 | 190 | typeCheckArg :: Argument -> Located I.Expression -> SubCodegen (Maybe Operand, Maybe (I.Type, Operand)) 191 | typeCheckArg (argTy, mode) = 192 | withLoc (codegenExpression >=> check mode) 193 | where 194 | check I.ModeIn val = do 195 | (expTy, op) <- dereference val 196 | typeCheck argTy expTy 197 | pure (Just op, Nothing) 198 | 199 | check I.ModeOut val = 200 | case val of 201 | (ValueReference expTy ptr) -> do 202 | typeCheck argTy expTy 203 | pure (Nothing, Just (expTy, ptr)) 204 | _ -> throwLocatedError $ ConstantExpressionAsParameter mode 205 | 206 | check I.ModeInOut val = 207 | case val of 208 | (ValueReference expTy ptr) -> do 209 | typeCheck argTy expTy 210 | (_, op) <- dereference val 211 | pure (Just op, Just (expTy, ptr)) 212 | _ -> throwLocatedError $ ConstantExpressionAsParameter mode 213 | 214 | codegenStatement :: I.Statement -> SubCodegen () 215 | 216 | codegenStatement (I.IfStatement condPart elseStmts) = mdo 217 | mapM_ (emitCondPart exitB) condPart 218 | mapM_ (withLoc codegenStatement) elseStmts 219 | br exitB 220 | 221 | exitB <- block "if.exit" 222 | return () 223 | where 224 | check = checkBoolean NonBooleanIfCondition 225 | 226 | emitCondPart exitB (cond, stmts) = mdo 227 | op <- withLoc (codegenExpression >=> check) cond 228 | 229 | ifB <- maybeGenBlock "then" exitB stmts 230 | cbr op ifB elseB 231 | 232 | elseB <- block "else" 233 | return () 234 | 235 | codegenStatement (I.WhileStatement cond body) = mdo 236 | br whileCondB 237 | 238 | whileCondB <- block "while.cond" 239 | op <- withLoc (codegenExpression >=> check) cond 240 | whileLoopB <- withLoopExit whileExitB $ maybeGenBlock "while.loop" whileCondB body 241 | cbr op whileLoopB whileExitB 242 | 243 | whileExitB <- block "while.exit" 244 | return () 245 | where 246 | check = checkBoolean NonBooleanWhileCondition 247 | 248 | codegenStatement (I.AssignStatement name exp) = do 249 | (varSymType, varPtr) <- withLoc getVar name 250 | case varSymType of 251 | SymbolVariable varType NotConstant -> do 252 | (expType, op) <- dereference =<< withLoc codegenExpression exp 253 | typeCheck varType expType 254 | store varPtr op 255 | SymbolVariable _ IsConstant -> throwLocatedError AssignmentToConstant 256 | _ -> throwLocatedError $ NotAVariable $ unLoc name 257 | 258 | codegenStatement (I.CallStatement name exps) = do 259 | (ty, proc) <- withLoc getVar name 260 | case ty of 261 | SymbolVariable _ _ -> throwLocatedError AttemptToCallAVariable 262 | SymbolFunction _ _ -> throwLocatedError AttemptToCallAFunctionAsProcedure 263 | SymbolProcedure args -> do 264 | (inOps, outRefs) <- codegenArgs args exps 265 | ops <- callFunc proc inOps [] (map (typeToLLVM . fst) outRefs) 266 | forM_ (zip outRefs ops) $ \((_, ptr), op) -> 267 | store ptr op 268 | 269 | codegenStatement (I.InputStatement name) = do 270 | (ty, ptr) <- withLoc getVar name 271 | case ty of 272 | SymbolVariable t NotConstant -> do 273 | op <- case t of 274 | I.IntegerType -> apiFunCall CallInputInteger [] 275 | _ -> throwLocatedError InputError 276 | store ptr op 277 | SymbolVariable _ IsConstant -> throwLocatedError AssignmentToConstant 278 | _ -> throwLocatedError InputError 279 | 280 | codegenStatement (I.OutputStatement exp) = do 281 | (ty, op) <- dereference =<< withLoc codegenExpression exp 282 | let api = case ty of 283 | I.IntegerType -> CallOutputInteger 284 | I.BooleanType -> CallOutputBoolean 285 | I.StringType -> CallOutputString 286 | apiProcCall api [op] 287 | 288 | codegenStatement I.NullStatement = return () 289 | 290 | codegenStatement I.BreakStatement = 291 | getLoopExitBlock >>= maybe (throwLocatedError BreakOutsideOfLoop) goto 292 | 293 | codegenStatement I.ReturnStatement = 294 | exit >>= \case 295 | (_, Just _) -> throwLocatedError VoidReturnInFunction 296 | (bname, Nothing) -> goto bname 297 | 298 | codegenStatement (I.ReturnValStatement exp) = 299 | exit >>= \case 300 | (bname, Just (retty, ptr)) -> do 301 | op <- withLoc (codegenExpression >=> check retty) exp 302 | store ptr op 303 | goto bname 304 | (_, Nothing) -> throwLocatedError NonVoidReturnInProcedure 305 | where 306 | check t val = do 307 | (t', op) <- dereference val 308 | typeCheck t t' 309 | return op 310 | 311 | codegenStatement I.HaltStatement = apiProcCall CallHalt [] 312 | codegenStatement I.NewlineStatement = apiProcCall CallNewline [] 313 | 314 | codegenExpression :: I.Expression -> SubCodegen ExpressionValue 315 | codegenExpression (I.UnOpExp unaryOp expr) = do 316 | (ty, fOp) <- dereference =<< withLoc codegenExpression expr 317 | case (unaryOp, ty) of 318 | (I.OpNot, I.BooleanType) -> do 319 | op <- notInstr fOp 320 | mkValue (ty, op) 321 | (I.OpNeg, I.IntegerType) -> 322 | genArithCall ty CallSSubWithOverflow (constZero $ typeToLLVM ty) fOp 323 | _ -> throwLocatedError $ UnaryOpTypeMismatch unaryOp ty 324 | 325 | codegenExpression (I.BinOpExp leftExp binaryOp rightExp) = do 326 | (leftType, leftOp) <- dereference =<< withLoc codegenExpression leftExp 327 | (rightType, rightOp) <- dereference =<< withLoc codegenExpression rightExp 328 | typeCheck leftType rightType 329 | 330 | let fn = case (binaryOp, leftType) of 331 | (I.OpEQ, I.BooleanType) -> icmp IP.EQ 332 | (I.OpEQ, I.IntegerType) -> icmp IP.EQ 333 | (I.OpLT, I.IntegerType) -> icmp IP.SLT 334 | (I.OpLE, I.IntegerType) -> icmp IP.SLE 335 | (I.OpGT, I.IntegerType) -> icmp IP.SGT 336 | (I.OpGE, I.IntegerType) -> icmp IP.SLE 337 | (I.OpNE, I.BooleanType) -> icmp IP.NE 338 | (I.OpNE, I.IntegerType) -> icmp IP.NE 339 | (I.OpAdd, I.IntegerType) -> genArithCall leftType CallSAddWithOverflow 340 | (I.OpSub, I.IntegerType) -> genArithCall leftType CallSSubWithOverflow 341 | (I.OpOr, I.BooleanType) -> or 342 | (I.OpMul, I.IntegerType) -> genArithCall leftType CallSMulWithOverflow 343 | (I.OpDiv, I.IntegerType) -> genDivOp leftType sdiv 344 | (I.OpMod, I.IntegerType) -> genDivOp leftType srem 345 | (I.OpAnd, I.BooleanType) -> and 346 | _ -> \_ _ -> throwLocatedError $ BinaryOpTypeMismatch binaryOp leftType 347 | 348 | fn leftOp rightOp 349 | where 350 | wrap ty f op0 op1 = do 351 | op <- instr (typeToLLVM ty) $ f op0 op1 [] 352 | mkValue (ty, op) 353 | 354 | icmp pred = wrap I.BooleanType (AST.ICmp pred) 355 | or = wrap I.BooleanType AST.Or 356 | sdiv = wrap I.IntegerType $ AST.SDiv False 357 | srem = wrap I.IntegerType AST.SRem 358 | and = wrap I.BooleanType AST.And 359 | 360 | codegenExpression (I.NumberExpression c@(I.Number num)) = 361 | if checkIntegerBounds num 362 | then mkValue (I.IntegerType, op) 363 | else throwLocatedError $ IntegerLiteralOutOfTypeRange c 364 | where 365 | op = ConstantOperand $ C.Int (typeBits integer) num 366 | 367 | codegenExpression (I.BoolExpression val) = 368 | mkValue (I.BooleanType, op) 369 | where 370 | op = if val then constTrue else constFalse 371 | 372 | codegenExpression (I.IdExpression name) = do 373 | (ty, ptr) <- withLoc getVar name 374 | case ty of 375 | SymbolVariable t NotConstant -> 376 | pure $ ValueReference t ptr 377 | SymbolVariable t IsConstant -> do 378 | op <- load (typeToLLVM t) ptr 379 | mkValue (t, op) 380 | _ -> throwLocatedError AttemptToReadSubroutine 381 | 382 | codegenExpression (I.CallExpression name exps) = do 383 | (ty, fun) <- withLoc getVar name 384 | case ty of 385 | SymbolVariable _ _ -> throwLocatedError AttemptToCallAVariable 386 | SymbolProcedure _ -> throwLocatedError AttemptToCallAProcedureAsFunction 387 | SymbolFunction retty args -> do 388 | (inOps, outRefs) <- codegenArgs args exps 389 | callFunc fun inOps [] (typeToLLVM retty : map (typeToLLVM . fst) outRefs) >>= \case 390 | (op : ops) -> do 391 | forM_ (zip outRefs ops) $ \((_, ptr), op) -> 392 | store ptr op 393 | mkValue (retty, op) 394 | _ -> throwLocatedError $ InternalError "Function missing return value" 395 | 396 | codegenExpression (I.StringLiteralExpression str) = do 397 | op <- withLoc emitString str 398 | mkValue (I.StringType, op) 399 | 400 | raiseError :: StandardCall -> SubCodegen () 401 | raiseError call = do 402 | loc <- currentLoc 403 | fileNameOp <- emitString (T.pack $ sourceName loc) 404 | let lineNumOp = ConstantOperand $ C.Int (typeBits integer) $ fromIntegral $ unPos $ sourceLine loc 405 | apiProcCall call [fileNameOp, lineNumOp] 406 | 407 | raiseConstraintError :: SubCodegen () 408 | raiseConstraintError = raiseError CallConstraintErrorEx 409 | 410 | raiseProgramError :: SubCodegen () 411 | raiseProgramError = raiseError CallProgramErrorEx 412 | 413 | -- | Generate a division operation with division by zero check. 414 | -- 415 | -- TODO Make check optional 416 | -- TODO Adjust weights of branches 417 | genDivOp :: I.Type -> (Operand -> Operand -> SubCodegen ExpressionValue) 418 | -> Operand -> Operand -> SubCodegen ExpressionValue 419 | genDivOp ty fn leftOp rightOp = mdo 420 | condOp <- instr boolean $ AST.ICmp IP.EQ rightOp (constZero $ typeToLLVM ty) [] 421 | cbr condOp exB divB 422 | 423 | exB <- block "div.ex" 424 | raiseConstraintError 425 | unreachable 426 | 427 | divB <- block "div" 428 | fn leftOp rightOp 429 | 430 | -- | Generate integer arithmetic operations with overflow check. 431 | -- 432 | -- TODO Make check optional 433 | -- TODO Adjust weights of branches 434 | genArithCall :: I.Type -> StandardCall -> Operand -> Operand -> SubCodegen ExpressionValue 435 | genArithCall ty c leftOp rightOp = mdo 436 | op <- apiFunCall c [leftOp, rightOp] 437 | obit <- instr boolean $ ExtractValue op [1] [] 438 | cbr obit exB arithB 439 | 440 | exB <- block "arith.ex" 441 | raiseConstraintError 442 | unreachable 443 | 444 | arithB <- block "arith" 445 | res <- instr (typeToLLVM ty) $ ExtractValue op [0] [] 446 | mkValue (ty, res) 447 | 448 | checkBoolean :: CodegenError -> ExpressionValue -> SubCodegen Operand 449 | checkBoolean err val = do 450 | (ty, op) <- dereference val 451 | case ty of 452 | I.BooleanType -> pure op 453 | _ -> throwLocatedError err 454 | -------------------------------------------------------------------------------- /lib/IMP/Parser.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module IMP.Parser 4 | ( Parser 5 | , parser 6 | -- Other exports for testing 7 | , stringLiteral 8 | , identifier 9 | ) where 10 | 11 | import IMP.AST 12 | import IMP.SourceLoc 13 | import IMP.Parser.Error 14 | import Control.Applicative hiding (many) 15 | import Control.Monad 16 | import Data.Char 17 | import Data.Foldable 18 | import Text.Megaparsec 19 | import Text.Megaparsec.Char 20 | import qualified Text.Megaparsec.Char.Lexer as L 21 | import qualified Data.Text as T 22 | import Data.Functor.Syntax 23 | import Data.List.NonEmpty (NonEmpty(..)) 24 | 25 | type Parser = Parsec CustomError T.Text 26 | 27 | comment :: Parser () 28 | comment = L.skipLineComment "--" 29 | 30 | sc :: Parser () 31 | sc = L.space space1 comment empty 32 | 33 | located :: Parser a -> Parser (Located a) 34 | located m = Located <$> getSourcePos <*> m 35 | 36 | lexeme' :: Parser a -> Parser a 37 | lexeme' = L.lexeme sc 38 | 39 | lexeme :: Parser a -> Parser (Located a) 40 | lexeme = located . lexeme' 41 | 42 | symbol' :: T.Text -> Parser () 43 | symbol' = void . L.symbol sc 44 | 45 | symbol :: T.Text -> Parser (Located ()) 46 | symbol = located . symbol' 47 | 48 | parens :: Parser a -> Parser a 49 | parens = between (symbol' "(") (symbol' ")") 50 | 51 | decimal :: Integral a => Parser a 52 | decimal = lexeme' L.decimal 53 | 54 | stringLiteral :: Parser (Located T.Text) 55 | stringLiteral = lexeme stringLiteral' 56 | where 57 | stringLiteral' = T.pack <$> (char '"' *> many stringElement <* char '"') 58 | stringElement = try (char '"' *> char '"') 59 | <|> satisfy (\c -> isPrint c && c /= '"') 60 | 61 | -- FIXME merge with identifier 62 | rword :: T.Text -> Parser () 63 | rword w = void $ lexeme' (string' w *> notFollowedBy alphaNumChar) 64 | 65 | rwords :: [T.Text] -> Parser () 66 | rwords = traverse_ rword 67 | 68 | rws :: [T.Text] 69 | rws = [ "and", "begin", "boolean", "break", "call", "else", "elsif", "end" 70 | , "false", "function", "halt", "if", "in", "input", "integer" 71 | , "is", "loop", "newline", "not", "null", "or", "out", "output" 72 | , "procedure", "return", "then", "true", "var", "while" 73 | ] 74 | 75 | isIdentifierStart :: Char -> Bool 76 | isIdentifierStart = (`elem` cats) . generalCategory 77 | where cats = [ UppercaseLetter 78 | , LowercaseLetter 79 | , TitlecaseLetter 80 | , ModifierLetter 81 | , OtherLetter 82 | , LetterNumber 83 | ] 84 | 85 | isIdentifierExtend :: Char -> Bool 86 | isIdentifierExtend = (`elem` cats) . generalCategory 87 | where cats = [ NonSpacingMark 88 | , SpacingCombiningMark 89 | , DecimalNumber 90 | , ConnectorPunctuation 91 | ] 92 | 93 | isIdentifierCont :: Char -> Bool 94 | isIdentifierCont c = isIdentifierStart c || isIdentifierExtend c 95 | 96 | data IdentCheckState = IdentCheckOther 97 | | IdentCheckPunctConn 98 | | IdentCheckTwoPunctConn 99 | 100 | instance Semigroup IdentCheckState where 101 | IdentCheckOther <> st = st 102 | IdentCheckPunctConn <> IdentCheckOther = IdentCheckOther 103 | IdentCheckPunctConn <> _ = IdentCheckTwoPunctConn 104 | IdentCheckTwoPunctConn <> _ = IdentCheckTwoPunctConn 105 | 106 | instance Monoid IdentCheckState where 107 | mempty = IdentCheckOther 108 | 109 | idName :: Parser (Located T.Text) 110 | idName = do 111 | -- FIXME look ahead to keep error position 112 | name <- lookAhead p 113 | check name 114 | lexeme p 115 | where 116 | identifierStart = satisfy isIdentifierStart "identifier start" 117 | identifierCont = takeWhileP (Just "identifier continuation") isIdentifierCont 118 | p = T.cons <$> identifierStart <*> identifierCont 119 | 120 | check :: T.Text -> Parser () 121 | check name = do 122 | when (T.toCaseFold name `elem` rws) $ 123 | customFailure $ RWordAsIdentifier name 124 | case T.foldl ((. checkChar) . (<>)) mempty name of 125 | IdentCheckOther -> pure () 126 | IdentCheckPunctConn -> customFailure $ IdentifierEndsWithPunctConn name 127 | IdentCheckTwoPunctConn -> customFailure $ IdentifierContainsTwoPunctConn name 128 | 129 | checkChar c = 130 | case generalCategory c of 131 | ConnectorPunctuation -> IdentCheckPunctConn 132 | _ -> IdentCheckOther 133 | 134 | comma, colon, semicolon, equals :: Parser () 135 | comma = symbol' "," 136 | colon = symbol' ":" 137 | semicolon = symbol' ";" 138 | equals = symbol' "=" 139 | 140 | identifier :: Parser (Located ID) 141 | identifier = ID . T.toCaseFold <$$> idName "identifier" 142 | 143 | typeName :: Parser (Located Type) 144 | typeName = located (IntegerType <$ rword "integer") 145 | <|> located (BooleanType <$ rword "boolean") 146 | 147 | bool :: Parser Bool 148 | bool = False <$ rword "false" 149 | <|> True <$ rword "true" 150 | 151 | number :: Parser Number 152 | number = Number <$> decimal 153 | 154 | relOp :: Parser BinaryOp 155 | relOp = OpEQ <$ symbol "==" 156 | <|> OpLE <$ try (symbol "<=") 157 | <|> OpLT <$ symbol "<" 158 | <|> OpGE <$ try (symbol ">=") 159 | <|> OpGT <$ symbol ">" 160 | <|> OpNE <$ symbol "#" 161 | 162 | mulOp :: Parser BinaryOp 163 | mulOp = OpMul <$ symbol "*" 164 | <|> OpDiv <$ symbol "/" 165 | <|> OpMod <$ symbol "%" 166 | <|> OpAnd <$ rword "and" 167 | 168 | addOp :: Parser BinaryOp 169 | addOp = OpAdd <$ symbol "+" 170 | <|> OpSub <$ symbol "-" 171 | <|> OpOr <$ rword "or" 172 | 173 | unaryOp :: Parser UnaryOp 174 | unaryOp = OpNot <$ rword "not" 175 | <|> OpNeg <$ symbol "-" 176 | 177 | parser :: Parser Program 178 | parser = between sc eof program 179 | 180 | program :: Parser Program 181 | program = Program <$> varDecs <*> subroutines 182 | 183 | varDec :: Parser VarDec 184 | varDec = VarDec <$> (rword "var" *> (identifier `sepBy` comma) <* colon) <*> typeName 185 | 186 | varDecs :: Parser [Located VarDec] 187 | varDecs = located varDec `endBy` semicolon 188 | 189 | subroutine :: Parser Subroutine 190 | subroutine = procedure <|> function 191 | 192 | subroutines :: Parser [Located Subroutine] 193 | subroutines = located subroutine `endBy` semicolon 194 | 195 | checkSubName :: Located ID -> Parser () 196 | checkSubName name = do 197 | endName <- unLoc <$> lookAhead identifier 198 | when (unLoc name /= endName) $ 199 | customFailure $ EndMismatch name endName 200 | void identifier 201 | 202 | procedure :: Parser Subroutine 203 | procedure = do 204 | rword "procedure" 205 | name <- identifier 206 | params <- parens paramLists 207 | rword "is" 208 | vars <- varDecs 209 | body <- procBody 210 | checkSubName name 211 | return $ Procedure name params vars body 212 | 213 | function :: Parser Subroutine 214 | function = do 215 | rword "function" 216 | name <- identifier 217 | params <- parens paramLists 218 | rword "return" 219 | returnType <- typeName 220 | rword "is" 221 | vars <- varDecs 222 | body <- procBody 223 | checkSubName name 224 | return $ Function name params returnType vars body 225 | 226 | procBody :: Parser Statements 227 | procBody = between (rword "begin") (rword "end") statements 228 | 229 | paramList :: Parser ParamList 230 | paramList = ParamList <$> (identifier `sepBy1` comma) <* colon <*> mode <*> typeName 231 | 232 | mode :: Parser Mode 233 | mode = ModeInOut <$ try (rwords ["in", "out"]) 234 | <|> ModeIn <$ try (rword "in") 235 | <|> ModeOut <$ try (rword "out") 236 | <|> pure ModeIn 237 | 238 | paramLists :: Parser [Located ParamList] 239 | paramLists = located paramList `sepBy` semicolon 240 | 241 | statement :: Parser Statement 242 | statement = ifStatement 243 | <|> WhileStatement <$> (rword "while" *> located expression <* rword "loop") <*> statements <* rwords ["end", "loop"] 244 | <|> CallStatement <$> (rword "call" *> identifier) <*> parens expressions 245 | <|> InputStatement <$> (rword "input" *> parens identifier) 246 | <|> OutputStatement <$> (rword "output" *> parens (located expression)) 247 | <|> NullStatement <$ rword "null" 248 | <|> BreakStatement <$ rword "break" 249 | <|> returnStatement 250 | <|> HaltStatement <$ rword "halt" 251 | <|> NewlineStatement <$ rword "newline" 252 | <|> AssignStatement <$> identifier <* equals <*> located expression 253 | 254 | ifStatement :: Parser Statement 255 | ifStatement = do 256 | rword "if" 257 | cond <- located expression 258 | rword "then" 259 | stmts <- statements 260 | elsifs <- many elsifPart 261 | elseStmts <- elsePart 262 | rwords ["end", "if"] 263 | return $ IfStatement ((cond, stmts) :| elsifs) elseStmts 264 | 265 | elsifPart :: Parser ConditionWithStatements 266 | elsifPart = (,) <$> (rword "elsif" *> located expression <* rword "then") <*> statements 267 | 268 | elsePart :: Parser Statements 269 | elsePart = rword "else" *> statements 270 | <|> pure [] 271 | 272 | returnStatement :: Parser Statement 273 | returnStatement = rword "return" *> (ReturnValStatement <$> located expression <|> pure ReturnStatement) 274 | 275 | statements :: Parser Statements 276 | statements = located statement `endBy` semicolon 277 | 278 | expression :: Parser Expression 279 | expression = try (BinOpExp <$> located simpleExpression <*> relOp <*> located simpleExpression) 280 | <|> simpleExpression 281 | 282 | expressions :: Parser [Located Expression] 283 | expressions = located expression `sepBy` comma 284 | 285 | simpleExpression :: Parser Expression 286 | simpleExpression = try (BinOpExp <$> located term <*> addOp <*> located term) 287 | <|> term 288 | 289 | term :: Parser Expression 290 | term = try (BinOpExp <$> located factor <*> mulOp <*> located factor) 291 | <|> factor 292 | 293 | factor :: Parser Expression 294 | factor = UnOpExp <$> unaryOp <*> located factor 295 | <|> NumberExpression <$> number 296 | <|> StringLiteralExpression <$> stringLiteral 297 | <|> BoolExpression <$> bool 298 | <|> parens expression 299 | <|> try (CallExpression <$> identifier <*> parens expressions) 300 | <|> IdExpression <$> identifier 301 | -------------------------------------------------------------------------------- /lib/IMP/Parser/Error.hs: -------------------------------------------------------------------------------- 1 | module IMP.Parser.Error 2 | ( CustomError(..) 3 | ) where 4 | 5 | import IMP.AST 6 | import IMP.SourceLoc 7 | import Text.Megaparsec.Error 8 | import Text.Printf 9 | import qualified Data.Text as T 10 | 11 | data CustomError = EndMismatch (Located ID) ID 12 | | RWordAsIdentifier T.Text 13 | | IdentifierEndsWithPunctConn T.Text 14 | | IdentifierContainsTwoPunctConn T.Text 15 | deriving (Eq, Ord) 16 | 17 | instance ShowErrorComponent CustomError where 18 | -- TODO show location of subroutine head 19 | showErrorComponent (EndMismatch name endName) = 20 | printf "\"%s\" expected but \"%s\" found." (getID $ unLoc name) 21 | (getID endName) 22 | showErrorComponent (RWordAsIdentifier name) = 23 | printf "Reserved word \"%s\" cannot be used as identifier." name 24 | 25 | showErrorComponent (IdentifierEndsWithPunctConn name) = 26 | printf "Identifier \"%s\" ends with a character in category punctuation_connector." name 27 | 28 | showErrorComponent (IdentifierContainsTwoPunctConn name) = 29 | printf "Identifier \"%s\" contains two consecutive characters in category punctuation_connector." name 30 | -------------------------------------------------------------------------------- /lib/IMP/SourceLoc.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveFunctor #-} 2 | -- | Utilities for working with source locations. 3 | module IMP.SourceLoc ( Located(..) 4 | , WithLoc(..) 5 | , SourcePos(..) 6 | , initialPos 7 | , withLoc 8 | ) where 9 | 10 | import Text.Megaparsec.Pos 11 | 12 | -- | Type @'Located' a@ represents values of type @a@ with source location 13 | -- attached to them. 14 | data Located a = Located 15 | { getLoc :: SourcePos 16 | , unLoc :: a 17 | } deriving (Eq, Ord, Show, Functor) 18 | 19 | -- | @'WithLoc' m@ represents computations with associated source location. 20 | -- 21 | -- Instances of 'WithLoc' should satisfy the following laws: 22 | -- 23 | -- * @'withNewLoc' p 'currentLoc' == 'pure' p@ 24 | -- 25 | class Applicative f => WithLoc f where 26 | -- | The expression (@'withNewLoc' p f@) returns computation @f@ with location 27 | -- @p@ set as context. 28 | withNewLoc :: SourcePos -> f a -> f a 29 | -- | Pure value containing current location. 30 | currentLoc :: f SourcePos 31 | 32 | -- | The expression (@'withLoc' f x@) passes the value from @x@ to the 33 | -- function @f@ and returns computation with location from @x@. 34 | withLoc :: WithLoc f => (t -> f a) -> Located t -> f a 35 | withLoc f (Located p a) = withNewLoc p (f a) 36 | -------------------------------------------------------------------------------- /lib/IMP/SymbolTable.hs: -------------------------------------------------------------------------------- 1 | module IMP.SymbolTable 2 | ( SymbolTable 3 | , empty 4 | , newScope 5 | , insert 6 | , lookup 7 | ) where 8 | 9 | import qualified Data.Map.Strict as Map 10 | import Data.List.NonEmpty (NonEmpty(..), (<|)) 11 | import Data.Monoid 12 | import Prelude hiding (lookup) 13 | 14 | newtype SymbolTable k v = SymbolTable (NonEmpty (Map.Map k v)) deriving Show 15 | 16 | -- | Creates an empty symbol table. 17 | empty :: SymbolTable k v 18 | empty = SymbolTable (Map.empty :| []) 19 | 20 | -- | Starts new scope in symbol table. 21 | newScope :: SymbolTable k v -> SymbolTable k v 22 | newScope (SymbolTable l) = SymbolTable $ Map.empty <| l 23 | 24 | -- | Insert new symbol into symbol table. 25 | -- 26 | -- The expression (@'insert' k v symtab@) inserts a new value @v@ with key @k@ 27 | -- into the active scope if it is not already present there. 28 | -- 29 | -- In case of success returns updated symbol table as @'Right'@. In case 30 | -- of error an old value of the symbol is returned as @'Left'@. 31 | insert :: Ord k => k -> v -> SymbolTable k v -> Either v (SymbolTable k v) 32 | insert k v (SymbolTable (s :| ss)) = 33 | case Map.lookup k s of 34 | Nothing -> Right $ SymbolTable $ Map.insert k v s :| ss 35 | Just v -> Left v 36 | 37 | -- | Lookup value in symbol table. 38 | -- 39 | -- The expression (@'lookup' k symtab'@) looks up a value with key 40 | -- @k@ in @symtab@ and returns it as @'Just'@ if found. Otherwise 41 | -- returns @'Nothing'@. 42 | lookup :: Ord k => k -> SymbolTable k v -> Maybe v 43 | lookup k (SymbolTable ss) = getFirst $ foldMap (First . Map.lookup k) ss 44 | -------------------------------------------------------------------------------- /lib/IMP/Types.hs: -------------------------------------------------------------------------------- 1 | module IMP.Types where 2 | 3 | import IMP.AST 4 | 5 | data IsConstant = NotConstant 6 | | IsConstant 7 | deriving Show 8 | 9 | type Argument = (Type, Mode); 10 | 11 | data SymbolType = SymbolVariable Type IsConstant 12 | | SymbolProcedure [Argument] 13 | | SymbolFunction Type [Argument] 14 | deriving Show 15 | 16 | data ArgumentHandling = ArgumentHandling 17 | { argInitialized :: !Bool 18 | , argReturned :: !Bool 19 | , argIsConstant :: !IsConstant 20 | } 21 | 22 | argumentHandling :: Argument -> ArgumentHandling 23 | argumentHandling (_, ModeIn) = ArgumentHandling True False IsConstant 24 | argumentHandling (_, ModeOut) = ArgumentHandling False True NotConstant 25 | argumentHandling (_, ModeInOut) = ArgumentHandling True True NotConstant 26 | -------------------------------------------------------------------------------- /lib/Test/Tasty/IMP.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module Test.Tasty.IMP 4 | ( AcceptTests(..) 5 | , TestsDirectory(..) 6 | , testCase 7 | , testSubDir 8 | ) where 9 | 10 | import Data.Typeable (Typeable) 11 | import Data.Proxy 12 | import Test.Tasty 13 | import Test.Tasty.Providers 14 | import Test.Tasty.Options 15 | import System.Exit 16 | import System.IO.Temp 17 | import System.FilePath 18 | import System.Process.ListLike 19 | import Text.Printf 20 | import Data.String.Builder 21 | import Control.Monad 22 | import qualified Data.Text as T 23 | import qualified Data.Text.IO as TIO 24 | import Control.Exception 25 | import System.IO.Error (isDoesNotExistError) 26 | import Data.Maybe 27 | import Control.Monad.IO.Class 28 | import Control.Monad.Writer 29 | import Control.Monad.State 30 | 31 | newtype ImpTest = ImpTest FilePath 32 | deriving Typeable 33 | 34 | newtype AcceptTests = AcceptTests Bool 35 | deriving (Eq, Ord, Typeable) 36 | 37 | instance IsOption AcceptTests where 38 | defaultValue = AcceptTests False 39 | parseValue = fmap AcceptTests . safeReadBool 40 | optionName = pure "imp-accept" 41 | optionHelp = pure "Accept current test outputs for IMP tests" 42 | optionCLParser = flagCLParser Nothing (AcceptTests True) 43 | 44 | newtype TestsDirectory = TestsDirectory String 45 | deriving (Eq, Ord, Typeable) 46 | 47 | instance IsOption TestsDirectory where 48 | defaultValue = TestsDirectory "" 49 | parseValue = Just . TestsDirectory 50 | optionName = pure "imp-tests-dir" 51 | optionHelp = pure "Root directory for IMP test files" 52 | 53 | readFileIfExists :: FilePath -> IO (Maybe T.Text) 54 | readFileIfExists name = do 55 | mbBs <- try $ TIO.readFile name 56 | case mbBs of 57 | Left e | isDoesNotExistError e -> pure Nothing 58 | | otherwise -> throwIO e 59 | Right bs -> pure $ Just bs 60 | 61 | instance IsTest ImpTest where 62 | testOptions = 63 | pure [ Option (Proxy :: Proxy AcceptTests) 64 | , Option (Proxy :: Proxy TestsDirectory) 65 | ] 66 | 67 | run opts (ImpTest name) _ = 68 | withSystemTempDirectory name $ \tmpDir -> do 69 | let outputFileName = tmpDir name 70 | args = ["-o", outputFileName, sourceFileName] 71 | (r, stdOut, stdErr) <- readCreateProcessWithExitCode (proc "impc" args) "" 72 | 73 | case r of 74 | ExitSuccess -> 75 | runExecutable outputFileName 76 | ExitFailure code -> pure $ testFailed $ build $ do 77 | literal $ printf "Compilation failed with exit code %d" code 78 | 79 | unless (null stdOut) $ do 80 | "\nCompiler output:\n" 81 | literal stdOut 82 | 83 | unless (null stdErr) $ do 84 | "\nCompiler error output:\n" 85 | literal stdErr 86 | where 87 | AcceptTests accept = lookupOption opts 88 | TestsDirectory dir = lookupOption opts 89 | sourceBaseName = dir name 90 | sourceFileName = sourceBaseName <.> "imp" 91 | stdInFileName = sourceBaseName <.> "stdin" 92 | stdOutFileName = sourceBaseName <.> "stdout" 93 | stdErrFileName = sourceBaseName <.> "stderr" 94 | 95 | runExecutable :: FilePath -> IO Result 96 | runExecutable exeFileName = do 97 | input <- readFileIfExists stdInFileName 98 | expStdOut <- readFileIfExists stdOutFileName 99 | expStdErr <- readFileIfExists stdErrFileName 100 | (r, stdOut, stdErr) <- readCreateProcessWithExitCode (proc exeFileName []) (fromMaybe "" input) 101 | 102 | case r of 103 | ExitSuccess -> do 104 | (strs, ok) <- flip runStateT True $ execWriterT $ do 105 | compareOutput "stdout" stdOutFileName expStdOut stdOut 106 | compareOutput "stderr" stdErrFileName expStdErr stdErr 107 | let func = if ok then testPassed else testFailed 108 | pure $ func $ unlines strs 109 | ExitFailure code -> pure $ testFailed $ printf "Execution failed with exit code %d" code 110 | 111 | compareOutput :: String -> FilePath -> Maybe T.Text -> T.Text -> (WriterT [String] (StateT Bool IO)) () 112 | compareOutput _ _ Nothing _ = pure () 113 | 114 | compareOutput label fileName (Just expected) actual = 115 | if expected /= actual 116 | then 117 | if accept 118 | then do 119 | liftIO $ TIO.writeFile fileName actual 120 | tell [ printf "Accepted new verison (%s)" label ] 121 | else do 122 | tell [ printf "Output differs (%s)" label ] 123 | put False 124 | else pure () 125 | 126 | testCase :: TestName -> TestTree 127 | testCase name = singleTest name $ ImpTest name 128 | 129 | testSubDir :: TestName -> String -> [TestTree] -> TestTree 130 | testSubDir name subDir = adjustOption (\(TestsDirectory dir) -> TestsDirectory $ dir subDir) . testGroup name 131 | -------------------------------------------------------------------------------- /package.yaml: -------------------------------------------------------------------------------- 1 | name: imp 2 | version: 0.1.0.0 3 | synopsis: Compiler for IMP programming language 4 | description: 5 | This is a compiler for IMP programming language 6 | implemented in Haskell. It uses LLVM as compiler 7 | backend. 8 | category: Compiler 9 | author: Ievgenii Meshcheriakov 10 | copyright: 2017-2018 Ievgenii Meshcheriakov 11 | license: MIT 12 | license-file: LICENSE 13 | build-type: Simple 14 | github: eugmes/imp 15 | 16 | data-files: 17 | - stdlib/impstd.c 18 | - stdlib/impstd.h 19 | 20 | ghc-options: 21 | - -Wall 22 | - -fno-warn-name-shadowing 23 | - -Wcompat 24 | 25 | library: 26 | source-dirs: lib 27 | dependencies: 28 | - base >= 4.7 && < 5 29 | - megaparsec == 8.* 30 | - text 31 | - llvm-hs-pure == 9.* 32 | - containers 33 | - mtl 34 | - bytestring 35 | - utf8-string 36 | - composition-extra 37 | - text-show 38 | - tasty 39 | - temporary 40 | - process-extras 41 | - filepath 42 | - stringbuilder 43 | 44 | executables: 45 | impc: 46 | source-dirs: src 47 | main: Main.hs 48 | dependencies: 49 | - base 50 | - imp 51 | - megaparsec == 8.* 52 | - text 53 | - llvm-hs == 9.* 54 | - bytestring 55 | - optparse-applicative 56 | - containers 57 | - temporary 58 | - filepath 59 | - process 60 | 61 | tests: 62 | testsuite: 63 | source-dirs: test 64 | main: TestSuite.hs 65 | dependencies: 66 | - base 67 | - imp 68 | - megaparsec == 8.* 69 | - tasty 70 | - tasty-hunit 71 | - text 72 | -------------------------------------------------------------------------------- /src/Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module Main (main) where 4 | 5 | import IMP.Parser 6 | import IMP.Codegen.Error 7 | import IMP.Emit 8 | import IMP.AST (Program) 9 | import qualified Data.Text.IO as TIO 10 | import qualified Text.Megaparsec as P 11 | import LLVM 12 | import LLVM.Context 13 | import LLVM.PassManager as PM 14 | import LLVM.Analysis 15 | import LLVM.Exception 16 | import LLVM.Target 17 | import qualified LLVM.Relocation as Reloc 18 | import qualified LLVM.CodeModel as CodeModel 19 | import qualified LLVM.CodeGenOpt as CodeGenOpt 20 | import qualified Data.ByteString.Char8 as C 21 | import LLVM.CommandLine 22 | import Options.Applicative as Opt 23 | import System.Exit 24 | import Control.Exception 25 | import Control.Monad 26 | import Data.String 27 | import qualified Data.Map as Map 28 | import System.Environment (getProgName, lookupEnv) 29 | import System.IO.Temp (withSystemTempDirectory) 30 | import System.FilePath 31 | import System.Process 32 | import Data.Maybe 33 | import Paths_imp (getDataFileName) 34 | 35 | getStdLibrarySource :: IO FilePath 36 | getStdLibrarySource = getDataFileName $ "stdlib" "impstd.c" 37 | 38 | getDefaultCCompiler :: IO FilePath 39 | getDefaultCCompiler = fromMaybe "cc" <$> lookupEnv "CC" 40 | 41 | data Stage = ParseStage 42 | | AssemblyStage 43 | | ObjectStage 44 | | LinkStage 45 | deriving Show 46 | 47 | data OutputKind = NativeOutput 48 | | LLVMOutput 49 | deriving Show 50 | 51 | data Options = Options 52 | { inputFile :: FilePath 53 | , lastStage :: Stage 54 | , outputKind :: OutputKind 55 | , outputFile :: Maybe FilePath 56 | , optimizationLevel :: Maybe Word 57 | , triple :: Maybe String 58 | , cpu :: Maybe String 59 | , llvmOptions :: [String] 60 | , cc :: Maybe FilePath 61 | } deriving Show 62 | 63 | getDefaultOutputExt :: Options -> String 64 | getDefaultOutputExt o = 65 | case (lastStage o, outputKind o) of 66 | (ParseStage, _) -> "tree" 67 | (AssemblyStage, NativeOutput) -> "s" 68 | (AssemblyStage, LLVMOutput) -> "ll" 69 | (ObjectStage, NativeOutput) -> "o" 70 | (ObjectStage, LLVMOutput) -> "bc" 71 | (LinkStage, _) -> "" 72 | 73 | makeOutputFileName :: Options -> Maybe FilePath 74 | makeOutputFileName o = 75 | case outputFile o of 76 | Just "-" -> Nothing 77 | Just name -> Just name 78 | Nothing -> Just $ takeBaseName (inputFile o) <.> getDefaultOutputExt o 79 | 80 | options :: Opt.Parser Options 81 | options = Options 82 | <$> strArgument (metavar "FILE" <> help "Source file name") 83 | <*> ( flag' ParseStage (long "parse-only" <> help "Stop after parsing and dump the parse tree") 84 | <|> flag' AssemblyStage (short 'S' <> help "Emit assembly") 85 | <|> flag' ObjectStage (short 'c' <> help "Emit object code/bitcode") 86 | <|> pure LinkStage ) 87 | <*> ( flag' LLVMOutput (long "emit-llvm" <> help "Emit LLVM assembly code/bitcode") 88 | <|> pure NativeOutput ) 89 | <*> optional (option str (short 'o' <> metavar "FILE" <> help "Redirect output to FILE")) 90 | <*> optional (option auto (short 'O' <> metavar "LEVEL" <> help "Set optimization level")) 91 | <*> optional (option str (long "triple" <> metavar "TRIPLE" <> help "Target triple for code generation")) 92 | <*> optional (option str (long "cpu" <> metavar "CPU" <> help "Target a specific CPU type")) 93 | <*> many (option str (long "llvm" <> metavar "OPTION" <> help "Additional options to pass to LLVM")) 94 | <*> optional (option str (long "cc" <> metavar "PATH" <> help "Use specified C compiler for linking")) 95 | 96 | withTargetFromOptions :: Options -> (TargetMachine -> IO a) -> IO a 97 | withTargetFromOptions o f = do 98 | initializeAllTargets 99 | triple <- case triple o of 100 | Nothing -> getDefaultTargetTriple 101 | Just t -> return $ fromString t 102 | (target, tname) <- lookupTarget Nothing triple 103 | let cpuName = maybe "" fromString $ cpu o 104 | features = Map.empty 105 | withTargetOptions $ \options -> 106 | withTargetMachine target tname cpuName features options Reloc.Default CodeModel.Default CodeGenOpt.Default f 107 | 108 | setLLVMCommandLineOptions :: [String] -> IO () 109 | setLLVMCommandLineOptions [] = return () 110 | setLLVMCommandLineOptions opts = do 111 | prog <- getProgName 112 | let args = map fromString $ prog : opts 113 | parseCommandLineOptions args Nothing 114 | 115 | -- TODO: Use some type class for this 116 | showingErrorsBy :: Either e a -> (e -> String) -> IO a 117 | showingErrorsBy v handler = 118 | case v of 119 | Left err -> do 120 | putStrLn $ handler err 121 | exitFailure 122 | Right r -> 123 | return r 124 | 125 | genCode :: Options -> FilePath -> Program -> IO () 126 | genCode o name pgm = do 127 | setLLVMCommandLineOptions $ llvmOptions o 128 | withContext $ \context -> 129 | withTargetFromOptions o $ \target -> do 130 | dataLayout <- getTargetMachineDataLayout target 131 | targetTriple <- getTargetMachineTriple target 132 | let opts = CodegenOptions name dataLayout targetTriple 133 | 134 | ast <- compileProgram opts pgm `showingErrorsBy` locatedErrorPretty 135 | withModuleFromAST context ast $ \m -> do 136 | verify m 137 | let passes = defaultCuratedPassSetSpec 138 | { PM.optLevel = optimizationLevel o 139 | , PM.dataLayout = Just dataLayout 140 | , PM.targetMachine = Just target 141 | } 142 | withPassManager passes $ \pm -> do 143 | void $ runPassManager pm m 144 | llstr <- case (lastStage o, outputKind o) of 145 | (AssemblyStage, NativeOutput) -> moduleTargetAssembly target m 146 | (AssemblyStage, LLVMOutput) -> moduleLLVMAssembly m 147 | (ObjectStage, NativeOutput) -> moduleObject target m 148 | (ObjectStage, LLVMOutput) -> moduleBitcode m 149 | _ -> error "Unexpected stage" 150 | emitOutput llstr 151 | where 152 | emitOutput = maybe C.putStr C.writeFile $ makeOutputFileName o 153 | 154 | outputTree :: Options -> Program -> IO () 155 | outputTree o tree = writeOutput $ show tree <> "\n" 156 | where 157 | writeOutput = maybe putStr writeFile $ makeOutputFileName o 158 | 159 | linkProgram :: Options -> FilePath -> FilePath -> IO () 160 | linkProgram o objectFileName outputFileName = do 161 | stdLibFile <- getStdLibrarySource 162 | cc <- maybe getDefaultCCompiler pure $ cc o 163 | let optOption = "-O" <> maybe "2" show (optimizationLevel o) 164 | compilerArgs = [ "-o", outputFileName 165 | , objectFileName, stdLibFile 166 | , optOption 167 | , "-no-pie" ] -- Needed when using GCC in some environments 168 | callProcess cc compilerArgs 169 | 170 | run :: Options -> IO () 171 | run o = do 172 | let fileName = inputFile o 173 | text <- TIO.readFile fileName 174 | tree <- P.runParser parser fileName text `showingErrorsBy` P.errorBundlePretty 175 | let runGenCode opts = genCode opts fileName tree `catch` \(VerifyException msg) -> do 176 | putStrLn "Verification exception:" 177 | putStrLn msg 178 | exitFailure 179 | case lastStage o of 180 | ParseStage -> outputTree o tree 181 | LinkStage -> 182 | case makeOutputFileName o of 183 | Nothing -> do 184 | putStrLn "Refusing to produce executable on standard output." 185 | exitFailure 186 | Just outputFileName -> 187 | withSystemTempDirectory "imp" $ \dir -> do 188 | let tmpExt = getDefaultOutputExt $ o { lastStage = ObjectStage } 189 | tmpFileName = dir takeBaseName fileName <.> tmpExt 190 | opts = o { lastStage = ObjectStage, outputFile = Just tmpFileName } 191 | runGenCode opts 192 | linkProgram o tmpFileName outputFileName 193 | _ -> runGenCode o 194 | 195 | main :: IO () 196 | main = execParser opts >>= run 197 | where 198 | opts = info (options <**> helper) 199 | ( progDesc "Compiles IMP programs" ) 200 | -------------------------------------------------------------------------------- /stack.yaml: -------------------------------------------------------------------------------- 1 | resolver: lts-15.15 2 | packages: 3 | - '.' 4 | extra-deps: 5 | - llvm-hs-9.0.1 6 | - llvm-hs-pure-9.0.0 7 | -------------------------------------------------------------------------------- /stdlib/impstd.c: -------------------------------------------------------------------------------- 1 | #include "impstd.h" 2 | #include 3 | #include 4 | #include 5 | 6 | int32_t _IMP_input_integer(void) 7 | { 8 | int32_t val; 9 | int ret = fscanf(stdin, "%"PRId32, &val); 10 | if (ret != 1) { 11 | fprintf(stderr, "IMP ERROR: input failed\n"); 12 | exit(EXIT_FAILURE); 13 | } 14 | 15 | return val; 16 | } 17 | 18 | void _IMP_output_integer(int32_t val) 19 | { 20 | printf("%"PRId32, val); 21 | } 22 | 23 | void _IMP_output_boolean(bool val) 24 | { 25 | printf("%s", val ? "true" : "false"); 26 | } 27 | 28 | void _IMP_output_string(const char *s) 29 | { 30 | printf("%s", s); 31 | } 32 | 33 | void _IMP_halt(void) 34 | { 35 | exit(0); 36 | } 37 | 38 | void _IMP_newline(void) 39 | { 40 | printf("\n"); 41 | } 42 | 43 | void _IMP_constraint_error_ex(const char *file_name, int32_t line_no) 44 | { 45 | /* TODO: Add error description. */ 46 | fprintf(stderr, "raised CONSTRAINT_ERROR : %s:%"PRId32"\n", file_name, line_no); 47 | exit(EXIT_FAILURE); 48 | } 49 | 50 | void _IMP_program_error_ex(const char *file_name, int32_t line_no) 51 | { 52 | /* TODO: Add error description. */ 53 | fprintf(stderr, "raised PROGRAM_ERROR : %s:%"PRId32"\n", file_name, line_no); 54 | exit(EXIT_FAILURE); 55 | } 56 | -------------------------------------------------------------------------------- /stdlib/impstd.h: -------------------------------------------------------------------------------- 1 | #ifndef IMPSTD_H 2 | #define IMPSTD_H 3 | #include 4 | #include 5 | 6 | int32_t _IMP_input_integer(void); 7 | void _IMP_output_integer(int32_t); 8 | void _IMP_output_boolean(bool); 9 | void _IMP_output_string(const char *); 10 | void _IMP_halt(void); 11 | void _IMP_newline(void); 12 | void _IMP_constraint_error_ex(const char *file_name, int32_t line_no) __attribute__((noreturn)); 13 | void _IMP_program_error_ex(const char *file_name, int32_t line_no) __attribute__((noreturn)); 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /test/CompilerTests.hs: -------------------------------------------------------------------------------- 1 | module CompilerTests (tests) where 2 | 3 | import Test.Tasty 4 | import Test.Tasty.IMP 5 | 6 | tests :: TestTree 7 | tests = testSubDir "Example Files" "test/examples" 8 | [ testCase "simple" 9 | , testCase "hello" 10 | , testCase "global" 11 | , testCase "fibi" 12 | , testCase "fibi_no_rec" 13 | , testCase "sum" 14 | , testCase "gcd" 15 | , testCase "test_inout" 16 | , testCase "test_sub_input" 17 | -- TODO add integer_lit_overflow 18 | ] 19 | -------------------------------------------------------------------------------- /test/IMP/Test/Parser.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | module IMP.Test.Parser (tests) where 3 | 4 | import Test.Tasty 5 | import Test.Tasty.HUnit 6 | 7 | import IMP.Parser 8 | import IMP.SourceLoc 9 | import IMP.AST 10 | 11 | import Data.Text (Text, unpack) 12 | import Text.Megaparsec 13 | import Data.Char 14 | import Text.Printf 15 | 16 | -- | Escape invalid characters in string tst cases so they lookg better in the 17 | -- test report. 18 | escapeString :: Text -> String 19 | escapeString = concatMap escape . unpack 20 | where 21 | escape c 22 | | isPrint c = [c] 23 | | otherwise = printf "#{%02X}" $ fromEnum c 24 | 25 | validParseTest :: (Eq b, Show b) => Parser a -> (a -> b) -> Text -> b -> TestTree 26 | validParseTest p f input expected = testCase name $ Just expected @=? actual 27 | where 28 | name = "for " <> escapeString input 29 | actual = f <$> parseMaybe p input 30 | 31 | invalidParseTest :: (Eq a, Show a) => Parser a -> Text -> TestTree 32 | invalidParseTest p input = testCase name $ Nothing @=? actual 33 | where 34 | name = "for " <> escapeString input 35 | actual = parseMaybe p input 36 | 37 | stringLiteralTests :: [(Text, Text)] 38 | stringLiteralTests = 39 | [ ("\"Message of the day:\"", "Message of the day:") 40 | , ("\"\"", "") 41 | , ("\" \"", " ") 42 | , ("\"A\"", "A") 43 | , ("\"\"\"\"", "\"") 44 | , ("\"Characters such as $, %, and } are allowed in string literals\"", 45 | "Characters such as $, %, and } are allowed in string literals") 46 | , ("\"Archimedes said \"\"Εύρηκα\"\"\"", 47 | "Archimedes said \"Εύρηκα\"") 48 | , ("\"Volume of cylinder (πr²h) = \"", 49 | "Volume of cylinder (πr²h) = ") 50 | ] 51 | 52 | invalidStringLiteralTests :: [Text] 53 | invalidStringLiteralTests = 54 | [ "\"unterminated string" 55 | , "\"string with embedded newline\n\"" 56 | , "\"string with tab\tcharacter\"" 57 | ] 58 | 59 | stringTests :: [TestTree] 60 | stringTests = map f stringLiteralTests ++ map f' invalidStringLiteralTests 61 | where 62 | f (i, r) = validParseTest stringLiteral unLoc i r 63 | f' = invalidParseTest stringLiteral 64 | 65 | validIdentifierTests :: [(Text, Text)] 66 | validIdentifierTests = 67 | [ ("Count", "count") 68 | , ("X", "x") 69 | , ("Get_Symbol", "get_symbol") 70 | , ("Ethelyn", "ethelyn") 71 | , ("Marion", "marion") 72 | , ("Snobol_4", "snobol_4") 73 | , ("X1", "x1") 74 | , ("Page_Count", "page_count") 75 | , ("Store_Next_Item", "store_next_item") 76 | , ("Πλάτω", "πλάτω") 77 | , ("Чайковский", "чайковский") 78 | , ("θ", "θ") 79 | , ("φ", "φ") 80 | ] 81 | 82 | invalidIdentifierTests :: [Text] 83 | invalidIdentifierTests = [ "123", "begin", "two__underscores", "ends_with_undercore_" ] 84 | 85 | identifierTests :: [TestTree] 86 | identifierTests = map f validIdentifierTests ++ map f' invalidIdentifierTests 87 | where 88 | f (i, r) = validParseTest identifier (getID . unLoc) i r 89 | f' = invalidParseTest identifier 90 | 91 | tests :: TestTree 92 | tests = testGroup "Parser" 93 | [ testGroup "stringLiteral" stringTests 94 | , testGroup "identifier" identifierTests 95 | ] 96 | -------------------------------------------------------------------------------- /test/IMP/Test/Tests.hs: -------------------------------------------------------------------------------- 1 | module IMP.Test.Tests (tests) where 2 | 3 | import Test.Tasty 4 | 5 | import qualified IMP.Test.Parser as Parser 6 | 7 | tests :: TestTree 8 | tests = testGroup "IMP" 9 | [ Parser.tests 10 | ] 11 | -------------------------------------------------------------------------------- /test/TestSuite.hs: -------------------------------------------------------------------------------- 1 | module Main (main) where 2 | 3 | import Test.Tasty 4 | import qualified IMP.Test.Tests as IMP 5 | import qualified CompilerTests 6 | 7 | main :: IO () 8 | main = defaultMain $ testGroup "Tests" [ 9 | IMP.tests, 10 | CompilerTests.tests 11 | ] 12 | -------------------------------------------------------------------------------- /test/examples/fibi.imp: -------------------------------------------------------------------------------- 1 | function fibi(n : integer) return integer is 2 | begin 3 | if n <= 2 then 4 | return 1; 5 | else 6 | return fibi(n-2) + fibi(n - 1); 7 | end if; 8 | end fibi; 9 | 10 | procedure main () is 11 | var n : integer; 12 | begin 13 | output("Enter number: "); input(n); 14 | output("Result is: "); output(fibi(n)); 15 | newline; 16 | end main; 17 | -------------------------------------------------------------------------------- /test/examples/fibi.stdin: -------------------------------------------------------------------------------- 1 | 20 2 | 3 | -------------------------------------------------------------------------------- /test/examples/fibi.stdout: -------------------------------------------------------------------------------- 1 | Enter number: Result is: 6765 2 | -------------------------------------------------------------------------------- /test/examples/fibi_no_rec.imp: -------------------------------------------------------------------------------- 1 | function fibi(x : integer) return integer is 2 | var a, b, c : integer; 3 | var i : integer; 4 | begin 5 | a = 1; 6 | b = 1; 7 | c = 0; 8 | i = 3; 9 | 10 | while i <= x loop 11 | c = a + b; 12 | a = b; 13 | b = c; 14 | i = i + 1; 15 | end loop; 16 | 17 | return b; 18 | end fibi; 19 | 20 | procedure main () is 21 | var n : integer; 22 | begin 23 | output("Enter number: "); input(n); 24 | output("Result is: "); output(fibi(n)); 25 | newline; 26 | end main; 27 | -------------------------------------------------------------------------------- /test/examples/fibi_no_rec.stdin: -------------------------------------------------------------------------------- 1 | 46 2 | -------------------------------------------------------------------------------- /test/examples/fibi_no_rec.stdout: -------------------------------------------------------------------------------- 1 | Enter number: Result is: 1836311903 2 | -------------------------------------------------------------------------------- /test/examples/gcd.imp: -------------------------------------------------------------------------------- 1 | function is_even (a : integer) return boolean is 2 | begin 3 | return a % 2 == 0; 4 | end is_even; 5 | 6 | function gcd (a, b : integer) return integer is 7 | var v_a, v_b, d : integer; 8 | begin 9 | v_a = a; v_b = b; 10 | d = 1; 11 | 12 | while is_even (v_a) and is_even(v_b) loop 13 | v_a = v_a / 2; 14 | v_b = v_b / 2; 15 | d = d * 2; 16 | end loop; 17 | 18 | while v_a # v_b loop 19 | if is_even (v_a) then 20 | v_a = v_a / 2; 21 | elsif is_even (v_b) then 22 | v_b = v_b / 2; 23 | elsif v_a > v_b then 24 | v_a = (v_a - v_b) / 2; 25 | else 26 | v_b = (v_b - v_a) / 2; 27 | end if; 28 | end loop; 29 | 30 | return v_a * d; 31 | end gcd; 32 | 33 | procedure main () is 34 | var a, b, c : integer; 35 | begin 36 | output ("Firsr number: "); input (a); 37 | output ("Second number: "); input (b); 38 | c = gcd (a, b); 39 | output (c); newline; 40 | end main; 41 | -------------------------------------------------------------------------------- /test/examples/gcd.stdin: -------------------------------------------------------------------------------- 1 | 4200 2 | 3528 3 | -------------------------------------------------------------------------------- /test/examples/gcd.stdout: -------------------------------------------------------------------------------- 1 | Firsr number: Second number: 168 2 | -------------------------------------------------------------------------------- /test/examples/global.imp: -------------------------------------------------------------------------------- 1 | var g : integer; 2 | -- Unused variable for testing 3 | var x : boolean; 4 | 5 | procedure main () is 6 | begin 7 | output("Enter number: "); 8 | input(g); 9 | output("Number is: "); output(g); newline; 10 | end main; 11 | -------------------------------------------------------------------------------- /test/examples/global.stdin: -------------------------------------------------------------------------------- 1 | 42 2 | -------------------------------------------------------------------------------- /test/examples/global.stdout: -------------------------------------------------------------------------------- 1 | Enter number: Number is: 42 2 | -------------------------------------------------------------------------------- /test/examples/hello.imp: -------------------------------------------------------------------------------- 1 | procedure main () is 2 | begin 3 | Output("Hello World!"); 4 | NewLine; 5 | end main; 6 | -------------------------------------------------------------------------------- /test/examples/hello.stdout: -------------------------------------------------------------------------------- 1 | Hello World! 2 | -------------------------------------------------------------------------------- /test/examples/integer_lit_overflow.imp: -------------------------------------------------------------------------------- 1 | procedure main () is 2 | begin 3 | output(42); newline; 4 | -- This should trigger exception 5 | output(2147483648); newline; 6 | end main; 7 | -------------------------------------------------------------------------------- /test/examples/simple.imp: -------------------------------------------------------------------------------- 1 | procedure main() is 2 | begin 3 | end main; 4 | -------------------------------------------------------------------------------- /test/examples/simple.stdout: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eugmes/imp/7bee4dcff146e041b0242b9e7054dfec4c04c84f/test/examples/simple.stdout -------------------------------------------------------------------------------- /test/examples/sum.imp: -------------------------------------------------------------------------------- 1 | function sum(x, y : integer) return integer is 2 | var s : integer; 3 | begin 4 | s = x + y; 5 | return s; 6 | end sum; 7 | 8 | procedure main () is 9 | var a, b : integer; 10 | begin 11 | output("Enter 2 integers: "); 12 | input(a); input(b); 13 | output("The sum is: "); 14 | output(sum(a, b)); 15 | newline; 16 | end main; 17 | -------------------------------------------------------------------------------- /test/examples/sum.stdin: -------------------------------------------------------------------------------- 1 | 123 2 | -10 3 | -------------------------------------------------------------------------------- /test/examples/sum.stdout: -------------------------------------------------------------------------------- 1 | Enter 2 integers: The sum is: 113 2 | -------------------------------------------------------------------------------- /test/examples/test_inout.imp: -------------------------------------------------------------------------------- 1 | procedure proc (a, b : out integer) is 2 | begin 3 | a = 42; 4 | b = 43; 5 | end proc; 6 | 7 | procedure proc1 (a, b : in out integer) is 8 | begin 9 | a = a + 42; 10 | b = b + 43; 11 | end proc1; 12 | 13 | procedure main () is 14 | var x, y : integer; 15 | begin 16 | call proc (x, y); 17 | output(x); newline; 18 | output(y); newline; 19 | call proc1 (x, y); 20 | output(x); newline; 21 | output(y); newline; 22 | end main; 23 | -------------------------------------------------------------------------------- /test/examples/test_inout.stdout: -------------------------------------------------------------------------------- 1 | 42 2 | 43 3 | 84 4 | 86 5 | -------------------------------------------------------------------------------- /test/examples/test_sub_input.imp: -------------------------------------------------------------------------------- 1 | procedure sub (a : out integer) is 2 | begin 3 | output("Enter integer: "); 4 | input(a); 5 | end sub; 6 | 7 | procedure main () is 8 | var a : integer; 9 | begin 10 | a = 10; 11 | call sub(a); 12 | output(a); newline; 13 | end main; 14 | -------------------------------------------------------------------------------- /test/examples/test_sub_input.stdin: -------------------------------------------------------------------------------- 1 | 45 2 | -------------------------------------------------------------------------------- /test/examples/test_sub_input.stdout: -------------------------------------------------------------------------------- 1 | Enter integer: 45 2 | --------------------------------------------------------------------------------