├── .dockerignore ├── Setup.hs ├── Makefile ├── src ├── Utils.hs ├── ExtraParsers.hs ├── Grammar.hs ├── KeywordParse.hs ├── Main.hs ├── Intermediate.hs ├── Paskell.hs ├── ConvertIR.hs ├── TypeCheck.hs ├── Emit.hs └── Codegen.hs ├── .gitignore ├── Dockerfile ├── .travis.yml ├── LICENSE ├── stack.yaml ├── paskell.cabal ├── README.md └── test └── Test.hs /.dockerignore: -------------------------------------------------------------------------------- 1 | .dockerignore 2 | Dockerfile 3 | Makefile -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | build: 2 | docker build -t paskell . 3 | 4 | test: build 5 | docker run paskell stack test --ghc-options=-Werror 6 | 7 | bash: build 8 | docker run -it paskell bash 9 | -------------------------------------------------------------------------------- /src/Utils.hs: -------------------------------------------------------------------------------- 1 | module Utils where 2 | 3 | import Text.Parsec 4 | import Text.Parsec.String 5 | 6 | p' :: Parser a -> String -> Either ParseError a 7 | p' p = parse p "" 8 | 9 | enumerate = zip [0..] 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | dist-* 3 | cabal-dev 4 | *.o 5 | *.hi 6 | *.chi 7 | *.chs.h 8 | *.dyn_o 9 | *.dyn_hi 10 | .hpc 11 | .hsenv 12 | .cabal-sandbox/ 13 | cabal.sandbox.config 14 | *.prof 15 | *.aux 16 | *.hp 17 | *.eventlog 18 | .stack-work/ 19 | cabal.project.local 20 | cabal.project.local~ 21 | .HTF/ 22 | .ghc.environment.* 23 | pascal-src/ 24 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:16.04 2 | 3 | RUN apt-get update 4 | RUN apt-get install -y curl llvm-5.0 llvm-5.0-dev 5 | RUN curl -sSL https://get.haskellstack.org/ | sh 6 | 7 | COPY Setup.hs /Paskell/Setup.hs 8 | COPY stack.yaml /Paskell/stack.yaml 9 | COPY paskell.cabal /Paskell/paskell.cabal 10 | WORKDIR /Paskell 11 | ENV PATH="/usr/lib/llvm-5.0/bin:$PATH" 12 | ENV PATH="$HOME/.local/bin:$PATH" 13 | RUN stack setup && stack build --only-dependencies 14 | # cache for steps above won't be invalidated upon changes to source files 15 | 16 | COPY . /Paskell 17 | RUN mkdir /Paskell/build 18 | ENV PATH="/Paskell/build:$PATH" 19 | RUN stack build --copy-bins --local-bin-path /Paskell/build 20 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: trusty 2 | sudo: false 3 | 4 | before_install: 5 | - wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - 6 | - sudo apt-add-repository "deb http://apt.llvm.org/trusty/ llvm-toolchain-trusty-5.0 main" 7 | - sudo apt-get update 8 | - sudo apt-get install -y llvm-5.0 llvm-5.0-dev 9 | - mkdir -p ~/.local/bin 10 | - export PATH=$HOME/.local/bin:$PATH 11 | - travis_retry curl -L https://www.stackage.org/stack/linux-x86_64 | tar xz --wildcards --strip-components=1 -C ~/.local/bin '*/stack' 12 | 13 | install: 14 | - stack setup 15 | 16 | script: 17 | - stack test --ghc-options=-Werror 18 | 19 | cache: 20 | directories: 21 | - $HOME/.stack 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018, sam46 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of sam46 nor the names of other 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /src/ExtraParsers.hs: -------------------------------------------------------------------------------- 1 | module ExtraParsers where 2 | 3 | import Text.Parsec 4 | import Text.Parsec.String 5 | import Text.Parsec.Combinator 6 | import Data.Char 7 | import Utils (p') 8 | 9 | comments :: Parser () 10 | comments = spaces -- todo change to actual comment parser 11 | 12 | whitespace :: Parser () 13 | whitespace = spaces >> comments 14 | 15 | charIgnoreCase :: Char -> Parser Char 16 | charIgnoreCase c = char (toUpper c) <|> char (toLower c) 17 | 18 | stringIgnoreCase :: String -> Parser String 19 | stringIgnoreCase [] = return [] 20 | stringIgnoreCase (x:xs) = (:) <$> charIgnoreCase x <*> stringIgnoreCase xs 21 | 22 | tok :: Parser a -> Parser a 23 | tok = (<* whitespace) 24 | 25 | charTok :: Char -> Parser Char 26 | charTok = tok . char 27 | 28 | charIgnoreCaseTok :: Char -> Parser Char 29 | charIgnoreCaseTok c = charTok (toUpper c) <|> charTok (toLower c) 30 | 31 | commaTok :: Parser Char 32 | commaTok = charTok ',' 33 | 34 | semicolTok :: Parser Char 35 | semicolTok = charTok ';' 36 | 37 | -- charsIgnoreCaseTok :: String -> Parser String 38 | -- charsIgnoreCaseTok = tok . charsIgnoreCase 39 | 40 | stringTok :: String -> Parser String 41 | stringTok = tok . string -- Doesn't parse exact words! e.g. parse (stringTok "ab") "abc" correctly consumes "ab" 42 | 43 | betweenCharTok :: Char -> Char -> Parser a -> Parser a 44 | betweenCharTok c1 c2 p = between (charTok c1) (charTok c2) p 45 | 46 | betweenSepbyComma :: Char -> Char -> Parser a -> Parser [a] 47 | betweenSepbyComma c1 c2 p = betweenCharTok c1 c2 $ 48 | sepBy p (charTok ',') 49 | 50 | exactTok :: String -> Parser String -- case is ignored! 51 | exactTok s = tok . try $ 52 | do { stringIgnoreCase s; notFollowedBy alphaNum; return s } 53 | -------------------------------------------------------------------------------- /stack.yaml: -------------------------------------------------------------------------------- 1 | # This file was automatically generated by 'stack init' 2 | # 3 | # Some commonly used options have been documented as comments in this file. 4 | # For advanced use and comprehensive documentation of the format, please see: 5 | # https://docs.haskellstack.org/en/stable/yaml_configuration/ 6 | 7 | # Resolver to choose a 'specific' stackage snapshot or a compiler version. 8 | # A snapshot resolver dictates the compiler version and the set of packages 9 | # to be used for project dependencies. For example: 10 | # 11 | # resolver: lts-3.5 12 | # resolver: nightly-2015-09-21 13 | # resolver: ghc-7.10.2 14 | # resolver: ghcjs-0.1.0_ghc-7.10.2 15 | # 16 | # The location of a snapshot can be provided as a file or url. Stack assumes 17 | # a snapshot provided as a file might change, whereas a url resource does not. 18 | # 19 | # resolver: ./custom-snapshot.yaml 20 | # resolver: https://example.com/snapshots/2018-01-01.yaml 21 | resolver: lts-11.22 22 | 23 | # User packages to be built. 24 | # Various formats can be used as shown in the example below. 25 | # 26 | # packages: 27 | # - some-directory 28 | # - https://example.com/foo/bar/baz-0.0.2.tar.gz 29 | # - location: 30 | # git: https://github.com/commercialhaskell/stack.git 31 | # commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a 32 | # - location: https://github.com/commercialhaskell/stack/commit/e7b331f14bcffb8367cd58fbfc8b40ec7642100a 33 | # subdirs: 34 | # - auto-update 35 | # - wai 36 | packages: 37 | - . 38 | # Dependency packages to be pulled from upstream that are not in the resolver 39 | # using the same syntax as the packages field. 40 | # (e.g., acme-missiles-0.3) 41 | # extra-deps: [] 42 | 43 | # Override default flag values for local packages and extra-deps 44 | # flags: {} 45 | 46 | # Extra package databases containing global packages 47 | # extra-package-dbs: [] 48 | 49 | # Control whether we use the GHC we find on the path 50 | # system-ghc: true 51 | # 52 | # Require a specific version of stack, using version ranges 53 | # require-stack-version: -any # Default 54 | # require-stack-version: ">=1.7" 55 | # 56 | # Override the architecture used by stack, especially useful on Windows 57 | # arch: i386 58 | # arch: x86_64 59 | # 60 | # Extra directories used by stack for building 61 | # extra-include-dirs: [/path/to/dir] 62 | # extra-lib-dirs: [/path/to/dir] 63 | # 64 | # Allow a newer minor version of GHC than the snapshot specifies 65 | # compiler-check: newer-minor -------------------------------------------------------------------------------- /paskell.cabal: -------------------------------------------------------------------------------- 1 | -- Initial paskell.cabal generated by cabal init. For further 2 | -- documentation, see http://haskell.org/cabal/users-guide/ 3 | 4 | name: paskell 5 | version: 0.1.0.0 6 | synopsis: A Pascal compiler with llvm backend 7 | -- description: 8 | homepage: https://github.com/sam46/Paskell 9 | license: BSD3 10 | license-file: LICENSE 11 | author: sam46 12 | maintainer: abdlwahdsa@gmail.com 13 | -- copyright: 14 | category: Development 15 | build-type: Simple 16 | extra-source-files: ChangeLog.md, README.md 17 | cabal-version: >=1.10 18 | 19 | executable paskell 20 | main-is: Main.hs 21 | -- other-modules: 22 | -- other-extensions: 23 | build-depends: base >=4.9 && <= 4.10.1.0, 24 | parsec, 25 | mtl, 26 | text, 27 | HUnit, 28 | haskeline, 29 | bytestring, 30 | containers, 31 | llvm-hs == 5.1.3, 32 | llvm-hs-pure == 5.1.2, 33 | process 34 | 35 | other-modules: Codegen 36 | ConvertIR 37 | Emit 38 | ExtraParsers 39 | Grammar 40 | Intermediate 41 | KeywordParse 42 | Paskell 43 | TypeCheck 44 | Utils 45 | hs-source-dirs: src 46 | default-language: Haskell2010 47 | 48 | test-suite spec 49 | type: exitcode-stdio-1.0 50 | main-is: Test.hs 51 | other-modules: Codegen 52 | ConvertIR 53 | Emit 54 | ExtraParsers 55 | Grammar 56 | Intermediate 57 | KeywordParse 58 | Paskell 59 | TypeCheck 60 | Utils 61 | hs-source-dirs: test src 62 | build-depends: base >=4.9 && <= 4.10.1.0, 63 | parsec, 64 | mtl, 65 | text, 66 | HUnit, 67 | haskeline, 68 | bytestring, 69 | containers, 70 | llvm-hs == 5.1.3, 71 | llvm-hs-pure == 5.1.2, 72 | process 73 | default-language: Haskell2010 74 | -------------------------------------------------------------------------------- /src/Grammar.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module Grammar where 4 | 5 | import Data.List (find) 6 | 7 | data Reserved = KWand | KWdownto | KWif | KWor | KWthen 8 | | KWarray | KWelse | KWin | KWpacked | KWto | KWbegin 9 | | KWend | KWlabel | KWprocedure | KWtype | KWcase 10 | | KWfile | KWmod | KWprogram | KWuntil | KWconst 11 | | KWfor | KWnil | KWrecord | KWvar | KWdiv | KWfunction 12 | | KWnot | KWrepeat | KWwhile | KWdo | KWgoto | KWof 13 | | KWset | KWwith | KWboolean | KWreal | KWinteger 14 | | KWstring | KWchar deriving (Show, Eq) 15 | 16 | data OP = OPplus | OPminus | OPstar | OPdiv | OPidiv | OPmod 17 | | OPand | OPeq | OPneq | OPless | OPgreater | OPle | OPge 18 | | OPin | OPor deriving (Eq) 19 | 20 | data Type = TYident Ident | TYbool 21 | | TYint | TYreal | TYchar |TYstr 22 | | TYptr Type | Void 23 | deriving (Show, Eq, Ord) 24 | type Ident = String 25 | type IdentList = [Ident] 26 | 27 | data Program = Program Ident Block deriving (Show, Eq) 28 | data Block = Block [Decl] Statement deriving (Show, Eq) 29 | 30 | type VarDecl = (Ident, Type) -- var a,b:char; 31 | type TypeDecl = (Ident, Type) -- var a,b:char; 32 | type CallByRef = Bool 33 | data Decl = DeclVar [VarDecl] 34 | | DeclType [TypeDecl] 35 | | DeclConst [ConstDecl] 36 | | DeclProc Ident [(Ident,Type,CallByRef)] Block 37 | | DeclFunc Ident [(Ident,Type,CallByRef)] Type Block 38 | deriving (Show, Eq) 39 | data ConstDecl = ConstDecl deriving (Show, Eq) -- todo 40 | 41 | data Statement = StatementSeq [Statement] 42 | | Assignment Designator Expr 43 | | ProcCall Ident ExprList 44 | | StatementIf Expr Statement (Maybe Statement) 45 | | StatementCase 46 | | StatementWhile Expr Statement 47 | | StatementRepeat Statement Expr 48 | | StatementFor Ident Expr ToDownTo Expr Statement 49 | | StatementNew Ident 50 | | StatementDispose Ident 51 | | StatementEmpty 52 | | StatmentRead DesigList 53 | | StatementReadLn DesigList 54 | | StatementWrite ExprList 55 | | StatementWriteLn ExprList 56 | deriving (Show, Eq) 57 | type ToDownTo = Bool 58 | 59 | data Designator = Designator Ident [DesigProp] deriving (Show, Eq) 60 | data DesigList = DesigList [Designator] deriving (Show, Eq) 61 | data DesigProp = DesigPropIdent Ident 62 | | DesigPropExprList ExprList 63 | | DesigPropPtr 64 | deriving (Show, Eq) 65 | 66 | type ExprList = [Expr] 67 | data Expr = Relation Expr OP Expr 68 | | Unary OP Expr 69 | | Mult Expr OP Expr 70 | | Add Expr OP Expr 71 | | FactorInt Int 72 | | FactorReal Double 73 | | FactorStr String 74 | | FactorChar Char 75 | | FactorTrue 76 | | FactorFalse 77 | | FactorNil 78 | | FactorDesig Designator 79 | | FactorNot Expr 80 | | FuncCall Ident ExprList 81 | deriving (Show, Eq) 82 | 83 | unaryops = [("+", OPplus), ("-", OPminus)] 84 | addops = [("+", OPplus), ("-", OPminus), ("or", OPor)] 85 | multops = [("*", OPstar), ("/", OPdiv), ("div", OPidiv), 86 | ("mod", OPmod), ("and", OPand)] 87 | relationops = [("=", OPeq), ("<>", OPneq), ("<=", OPle), 88 | (">=", OPge), ("<", OPless), (">", OPgreater), 89 | ("in", OPin)] 90 | operators = addops ++ multops ++ relationops 91 | 92 | instance Show OP where 93 | show op = case find (\(_,b) -> b==op) operators of 94 | Nothing -> "OP??" 95 | Just (a,_) -> a -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Paskell 2 | [![Build Status](https://travis-ci.org/sam46/Paskell.svg?branch=master)](https://travis-ci.org/sam46/Paskell) 3 | A (reduced) Pascal compiler in Haskell that compiles to LLVM 4 | 5 | - [Paskell](#paskell) 6 | + [Features](#features) 7 | + [Progress](#progress) 8 | + [Usage](#usage) 9 | + [Demo](#demo) 10 | + [Building](#building) 11 | * [With docker](#with-docker) 12 | * [Without docker](#without-docker) 13 | + [Tests](#tests) 14 | + [Implementation](#implementation) 15 | + [TODO](#todo) 16 | + [Contributions](#contributions) 17 | + [References](#references) 18 | 19 | ### Features 20 | - Declarations: var, type (aliases) 21 | - Types: integer, boolean, string, char, real 22 | - Control Flow: if, while, for 23 | - Functions/Procedures 24 | - Pass by reference 25 | - Basic Typecasting 26 | - Nested Functions/Procedures (Not finished yet) 27 | - I/O: Write/Writeln 28 | 29 | ### Progress 30 | - [x] Lexing/Parsing 31 | - [x] Semantic Analysis/Type-Checking 32 | - [x] Type-Annotated IR AST 33 | - [x] IR pretty-printer 34 | - [x] LLVM Code generation 35 | 36 | ### Usage 37 | Once the executable is built, it can be used to compile Pascal source files to llvm-ir, or internal IR used by the compiler: 38 | 39 | `paskell -c src` compile to llvm-ir 40 | `paskell -c src dest` compile to llvm-ir and save in dest 41 | `paskell -ir src` produce internal IR 42 | `paskell -x src` execute pascal source. Equivalent to 43 | `paskell -c src | lli` 44 | `paskell -h` (for help) 45 | 46 | ### Demo: 47 | The compiler is complemented with the `llvm` utilities 48 | ``` 49 | $ paskell -c fib.pas fib.ll 50 | ``` 51 | 52 | Since the output is llvm-ir, we can leverage the many tools LLVM provide to: 53 | - execute it using the llvm interpreter 54 | `$ lli fib.ll` 55 | - convert it to bitcode llvm assembly (.bc) 56 | `$ llvm-as fib.ll -o fib.bc` 57 | - optimize the code using various settings, for example 58 | `$ opt -mem2reg fib.bc` 59 | - translate it to a native assembly executable of a specific architecture (x86, ARM, etc) 60 | `$ llc -march=x86-64 fib.bc -o fib.s` 61 | - link many modules into one program 62 | 63 | ### Building 64 | 65 | ##### With docker 66 | ``` 67 | $ make bash 68 | ``` 69 | to build the compiler and launch a shell session where the compiler and llvm utitlies are in `$PATH` and ready out-of-the-box. 70 | 71 | ###### Alternatively 72 | ``` 73 | $ make build 74 | ``` 75 | will build the same image tagged `paskell` 76 | which can be used with `docker run` and volumes. 77 | For example: 78 | ``` 79 | $ docker run -v /path/to/original_file.pas:/path/to/file.pas paskell paskell -c /path/to/file.pas 80 | ``` 81 | 82 | ##### Without docker 83 | You need to have llvm installed 84 | ``` 85 | $ sudo apt-get install llvm-5.0 86 | ``` 87 | `lli` should be in `$PATH` to be able to execute Pascal programs 88 | 89 | Then, you can use Cabal or Stack. 90 | To build using Cabal: 91 | 92 | ``` 93 | $ cd Paskell/ 94 | $ cabal install -j 95 | ``` 96 | this will install all dependencies and produce an executable in 97 | `dist/build/Paskell/` 98 | 99 | You can also build using Stack. 100 | 101 | ### Tests 102 | ``` 103 | $ make test 104 | ``` 105 | to run the test suite using docker. 106 | 107 | ### Implementation 108 | This is a 4-pass compiler: 109 | 110 | **pass 1**: lex/parsing 111 | **pass 2**: type checking 112 | **pass 3**: constructing IR: type-annotation, type resolution, (future: identifier-renaming, nested-function extraction) 113 | **pass 4**: code generation 114 | 115 | ### TODO 116 | - finish nested functions/procedures: 117 | this only requires pulling nested functions to global scope 118 | and renaming them during the type-annotation pass 119 | - constants: trivial to implement 120 | - Read/Readln IO statements 121 | - records 122 | - arrays 123 | - case statements 124 | - forward declaration 125 | 126 | ### Contributions 127 | Bug reports, added features, etc are welcome 128 | 129 | ### References 130 | - [Language grammar](http://courses.washington.edu/css448/zander/Project/grammar.pdf) 131 | - Stephen Diehl's Haskell [llvm-tutorial](http://www.stephendiehl.com/llvm/) 132 | -------------------------------------------------------------------------------- /src/KeywordParse.hs: -------------------------------------------------------------------------------- 1 | module KeywordParse where 2 | 3 | import Text.Parsec 4 | import Text.Parsec.String 5 | import Text.Parsec.Combinator 6 | import Data.Char 7 | import Data.List (find) 8 | import Grammar 9 | import ExtraParsers 10 | import Utils (p') 11 | 12 | -- make sure keywords are lower-cased 13 | keywords = ["and","downto","if","or", 14 | "then","array","else","in","packed", 15 | "to","begin","end","label","procedure", 16 | "type","case","file","mod","program","until", 17 | "const","for","nil","record","var", "writeln", 18 | "div","function","not","repeat","while", 19 | "do","goto","of","set","with", "write", 20 | "boolean", "integer", "char", "string", "real"] 21 | special = [":=","+","-","*","/","=", 22 | "<",">","<>","<=",">=","(",")","[", 23 | "]",",",".",";",":","..","^"] 24 | 25 | 26 | ---------------------------------------------------- 27 | -- specialEscape = [chr 0x08, chr 0x0C, '\n', '\r', 28 | -- '\t', '\v', '\'', '"', '\\'] 29 | -- isEscapeChar c = c `elem` specialEscape 30 | 31 | data SpecialChar = BackSpace | FormFeed 32 | | NewLine | CarriageReturn | Tab | VerticalTab 33 | | SingleQuote | DoubleQuote | Backslash 34 | deriving (Eq, Ord, Show) 35 | 36 | fromSpecialChar :: SpecialChar -> Char 37 | fromSpecialChar BackSpace = chr 0x08 38 | fromSpecialChar FormFeed = chr 0x0C 39 | fromSpecialChar NewLine = '\n' 40 | fromSpecialChar CarriageReturn = '\r' 41 | fromSpecialChar Tab = '\t' 42 | fromSpecialChar VerticalTab = '\v' 43 | fromSpecialChar SingleQuote = '\'' 44 | fromSpecialChar DoubleQuote = '"' 45 | fromSpecialChar Backslash = '\\' 46 | 47 | toSpecialChar :: Char -> Maybe SpecialChar 48 | toSpecialChar c = snd <$> find ((==) c . fst) table 49 | where table = [('b', BackSpace), ('"' , DoubleQuote), 50 | ('f', FormFeed), ('n', NewLine), ('t', Tab), 51 | ('r', CarriageReturn), ('\\', Backslash), 52 | ('v', VerticalTab), ('\'', SingleQuote)] 53 | 54 | ---------------------------------------------------- 55 | 56 | parseReserved :: String -> Reserved -> Parser Reserved 57 | parseReserved kw ctor = exactTok kw >> return ctor 58 | parseKWand = parseReserved "and" KWand 59 | parseKWdownto = parseReserved "downto" KWdownto 60 | parseKWif = parseReserved "if" KWif 61 | parseKWor = parseReserved "or" KWor 62 | parseKWthen = parseReserved "then" KWthen 63 | parseKWarray = parseReserved "array" KWarray 64 | parseKWelse = parseReserved "else" KWelse 65 | parseKWin = parseReserved "in" KWin 66 | parseKWpacked = parseReserved "packed" KWpacked 67 | parseKWto = parseReserved "to" KWto 68 | parseKWbegin = parseReserved "begin" KWbegin 69 | parseKWend = parseReserved "end" KWend 70 | parseKWlabel = parseReserved "label" KWlabel 71 | parseKWprocedure = parseReserved "procedure" KWprocedure 72 | parseKWtype = parseReserved "type" KWtype 73 | parseKWcase = parseReserved "case" KWcase 74 | parseKWfile = parseReserved "file" KWfile 75 | parseKWmod = parseReserved "mod" KWmod 76 | parseKWprogram = parseReserved "program" KWprogram 77 | parseKWuntil = parseReserved "until" KWuntil 78 | parseKWconst = parseReserved "const" KWconst 79 | parseKWfor = parseReserved "for" KWfor 80 | parseKWnil = parseReserved "nil" KWnil 81 | parseKWrecord = parseReserved "record" KWrecord 82 | parseKWvar = parseReserved "var" KWvar 83 | parseKWdiv = parseReserved "div" KWdiv 84 | parseKWfunction = parseReserved "function" KWfunction 85 | parseKWnot = parseReserved "not" KWnot 86 | parseKWrepeat = parseReserved "repeat" KWrepeat 87 | parseKWwhile = parseReserved "while" KWwhile 88 | parseKWdo = parseReserved "do" KWdo 89 | parseKWgoto = parseReserved "goto" KWgoto 90 | parseKWof = parseReserved "of" KWof 91 | parseKWset = parseReserved "set" KWset 92 | parseKWwith = parseReserved "with" KWwith 93 | parseKWboolean = parseReserved "boolean" KWboolean 94 | parseKWstring = parseReserved "string" KWstring 95 | parseKWchar = parseReserved "char" KWchar 96 | parseKWinteger = parseReserved "integer" KWinteger 97 | parseKWreal = parseReserved "real" KWreal 98 | parseKWwrite = parseReserved "write" KWreal 99 | parseKWwriteln = parseReserved "writeln" KWreal 100 | -------------------------------------------------------------------------------- /src/Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module Main where 4 | 5 | import qualified Paskell as P 6 | import qualified ConvertIR as C 7 | import qualified Emit as E 8 | import qualified TypeCheck as T 9 | 10 | import Control.Monad.Trans 11 | -- import System.Console.Haskeline 12 | import Data.List (isPrefixOf) 13 | import qualified Data.Text as Text 14 | 15 | import System.IO 16 | import System.Environment 17 | import System.Process 18 | 19 | -- msg = "Paskell version unknown.\n" 20 | -- ++ "Type ':l path' to compile a pascal source file.\n" 21 | helpmsg = "Usage:\n " 22 | ++ "paskell -c src compile to llvm-ir\n " 23 | ++ "paskell -c src dest compile to llvm-ir and save in dest\n " 24 | ++ "paskell -ir src produce IR\n " 25 | ++ "paskell -x src execute pascal source. Equivalent to paskell -c src | lli\n " 26 | -- ++ "paskell -repl\n\t" 27 | ++ "paskell -h (for help)" 28 | 29 | strip s = Text.unpack $ Text.strip $ Text.pack s 30 | 31 | -- process :: String -> IO () 32 | -- process line = do 33 | -- let res = P.parseToplevel line 34 | -- case res of Left err -> print err 35 | -- Right tree -> print tree 36 | 37 | -- processParse :: String -> IO () 38 | -- processParse path = do 39 | -- res <- P.parsePascalFile path 40 | -- case res of Left err -> print err 41 | -- Right tree -> print tree 42 | 43 | -- repl :: InputT IO () 44 | -- repl = do 45 | -- minput <- getInputLine "Paskell> " 46 | -- case minput of 47 | -- Nothing -> outputStrLn "Leaving Paskell." 48 | -- Just input | isPrefixOf ":l " input -> 49 | -- (liftIO $ processCompile (strip input) "") >> repl 50 | -- | otherwise -> (liftIO $ process (strip input)) >> repl 51 | 52 | processIR :: String -> IO () 53 | processIR path = do 54 | e <- P.parsePascalFile path 55 | case e of Left errP -> print errP 56 | Right ast -> case T.typechkProgram ast 57 | of Left errTC -> print errTC 58 | Right _ -> print $ C.convProgram ast 59 | 60 | processCompile :: String -> String -> IO () 61 | processCompile path dest = do 62 | e <- P.parsePascalFile path 63 | case e of Left errP -> print errP 64 | Right ast -> case T.typechkProgram ast of 65 | Left errTC -> print errTC 66 | Right _ -> E.printllvm ast >>= 67 | if dest /= "" 68 | then writeFile dest 69 | else putStrLn 70 | 71 | processExec :: String -> IO () 72 | processExec path = do 73 | e <- P.parsePascalFile path 74 | case e of Left errP -> print errP 75 | Right ast -> case T.typechkProgram ast of 76 | Right _ -> E.printllvm ast >>= execLLVM 77 | Left errTC -> print errTC 78 | 79 | execLLVM :: String -> IO () 80 | execLLVM llvm = do 81 | (excode,res,stderr) <- readCreateProcessWithExitCode (shell "lli") llvm 82 | putStr res 83 | if length stderr > 0 84 | then putStr $ stderr ++ "\n" ++ (show excode) 85 | else putStr "" 86 | 87 | main :: IO () 88 | main = do 89 | args <- getArgs 90 | case args of 91 | -- [cmd] | (strip cmd) == "-repl" -> 92 | -- putStrLn msg >> runInputT defaultSettings repl 93 | -- | otherwise -> putStrLn helpmsg 94 | [cmd, path] | (strip cmd) == "-c" -> 95 | liftIO (processCompile (strip path) "") 96 | | (strip cmd) == "-x" -> 97 | liftIO (processExec (strip path)) 98 | | (strip cmd) == "-ir" -> 99 | liftIO (processIR (strip path)) 100 | | otherwise -> putStrLn helpmsg 101 | [cmd, path, dest] | (strip cmd) == "-c" -> 102 | liftIO $ processCompile (strip path) (strip dest) 103 | | otherwise -> putStrLn helpmsg 104 | _ -> putStrLn helpmsg 105 | -------------------------------------------------------------------------------- /src/Intermediate.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module Intermediate where 4 | 5 | import Grammar (OP, Type, Ident, IdentList, 6 | VarDecl, TypeDecl, CallByRef, ToDownTo) 7 | import Data.List (intercalate) 8 | import Prelude hiding (showList) 9 | 10 | data Program = Program Ident Block Type deriving (Eq) 11 | data Block = Block [Decl] Statement Type deriving (Eq) 12 | 13 | data Decl = DeclVar [VarDecl] Type 14 | | DeclType [TypeDecl] Type 15 | | DeclConst [ConstDecl] Type 16 | | DeclFunc Ident [(Ident,Type,CallByRef)] Type Block Type 17 | deriving (Eq) 18 | data ConstDecl = ConstDecl Type deriving (Show, Eq) -- todo 19 | 20 | data Statement = StatementSeq [Statement] Type 21 | | Assignment Designator Expr Type 22 | | ProcCall Ident ExprList Type 23 | | StatementIf Expr Statement (Maybe Statement) Type 24 | | StatementCase Type 25 | | StatementWhile Expr Statement Type 26 | | StatementRepeat Statement Expr Type 27 | | StatementFor Ident Expr ToDownTo Expr Statement Type 28 | | StatementNew Ident Type 29 | | StatementDispose Ident Type 30 | | StatementEmpty 31 | | StatmentRead DesigList Type 32 | | StatementReadLn DesigList Type 33 | | StatementWrite ExprList Type 34 | deriving (Eq) 35 | 36 | data Designator = Designator Ident [DesigProp] Type deriving (Eq) 37 | data DesigList = DesigList [Designator] Type deriving (Show, Eq) 38 | data DesigProp = DesigPropIdent Ident Type 39 | | DesigPropExprList ExprList Type 40 | | DesigPropPtr Type 41 | deriving (Show, Eq) 42 | 43 | type ExprList = [Expr] 44 | data Expr = 45 | Relation {getOperand :: Expr, getOP :: OP, getOperand' :: Expr, getType :: Type} 46 | | Unary {getOP :: OP, getOperand :: Expr, getType :: Type} 47 | | Mult {getOperand :: Expr, getOP :: OP, getOperand' :: Expr, getType :: Type} 48 | | Add {getOperand :: Expr, getOP :: OP, getOperand' :: Expr, getType :: Type} 49 | | FactorInt {getOperandi :: Int, getType :: Type} 50 | | FactorReal {getOperandr :: Double, getType :: Type} 51 | | FactorStr {getOperands :: String, getType :: Type} 52 | | FactorTrue {getType :: Type} 53 | | FactorFalse {getType :: Type} 54 | | FactorNil {getType :: Type} 55 | | FactorDesig {getOperandd :: Designator, getType :: Type} 56 | | FactorNot {getOperand :: Expr, getType :: Type} 57 | | FuncCall {getFunc :: Ident, getExprs :: ExprList, getType :: Type} 58 | deriving (Eq) 59 | 60 | instance Ord Expr where 61 | x1 `compare` x2 = t1 `compare` t2 62 | where (t1, t2) = (getType x1, getType x2) 63 | 64 | --------- Pretty-Printer --------- 65 | 66 | showList xs = intercalate " " (map show xs) 67 | showListSep xs = intercalate ", " (map show xs) 68 | showListNoSpc xs = intercalate "" (map show xs) 69 | tab n = intercalate "" (replicate n " ") 70 | 71 | instance Show Program where 72 | show (Program x b _) = "Program "++ x ++ " " ++ (pshowB 0 b) 73 | 74 | instance Show Block where 75 | show x = pshowB 0 x 76 | 77 | pshowB n (Block ds s _) = "{\n"++ (if length ds == 0 then "" 78 | else (intercalate "" $ map (pshowD (n+1)) ds)) ++ "\n" ++ (pshowSt (n+1) s) ++ "\n" 79 | ++ (tab n) ++ "}" 80 | 81 | instance Show Decl where 82 | show x = pshowD 0 x 83 | 84 | pshowD n (DeclVar xs _) = (tab n) ++ "Var " ++ (if length xs == 0 then "" 85 | else showList xs) ++ ";\n" 86 | pshowD n (DeclType xs _) = (tab n) ++ "Type " ++ (if length xs == 0 then "" 87 | else showList xs) ++ ";\n" 88 | pshowD n (DeclConst xs _) = (tab n) ++ "Const " ++ (if length xs == 0 then "" 89 | else showList xs) ++ ";\n" 90 | pshowD n (DeclFunc x xs t b _) = (tab n) ++ "Func " ++ x ++ ":" ++ (show t) ++ " " ++ 91 | "(" ++ showListSep (map (\(a',b',cbr) -> (a', (if cbr then "&" else "") ++ show b')) xs) 92 | ++ ") " ++ (pshowB n b) ++ "\n" 93 | 94 | instance Show Designator where 95 | show x = pshowDes 0 x 96 | 97 | pshowDes n (Designator x _ t) = (tab n) ++ x ++ ":" ++ (show t) 98 | 99 | instance Show Statement where 100 | show x = pshowSt 0 x 101 | pshowSt n (Assignment x ex t) = (pshowDes n x) ++ " := " ++ (pshowEx 0 ex) ++ ";\n" 102 | pshowSt n (StatementEmpty) = (tab n) ++ ";\n" 103 | pshowSt n (StatementSeq xs _) = if length xs == 0 then "" 104 | else (intercalate "" $ map (pshowSt n) xs) 105 | pshowSt n (StatementIf ex s ms _) = (tab n) ++ "if " ++ (pshowEx 0 ex) ++"\n"++ 106 | (tab n) ++ "then\n" ++ (pshowSt (n+1) s) 107 | ++ (case ms of Nothing -> "" 108 | Just s2 -> (tab n) ++ "else\n" ++ (pshowSt (n+1) s2)) 109 | pshowSt n (ProcCall f exs _) = (tab n) ++ f 110 | ++ "(" ++ (if length exs == 0 then "" else showListSep exs) ++"):" 111 | pshowSt n (StatementWrite exs _) = (tab n) ++ 112 | "Write(" ++ (if length exs == 0 then "" else showListSep exs) ++"):" 113 | pshowSt n _ = (tab n) ++ "??" ++ ";\n" 114 | 115 | 116 | 117 | instance Show Expr where 118 | show x = pshowEx 0 x 119 | 120 | pshowEx n (Relation ex1 op ex2 t) = "("++(pshowEx 0 ex1) ++ (show op) ++ (pshowEx 0 ex2) ++ "):" ++ (show t) 121 | pshowEx n (Unary op ex t) = "(" ++(show op) ++ (pshowEx 0 ex) ++ "):" ++ (show t) 122 | pshowEx n (Mult ex1 op ex2 t) = "("++(pshowEx 0 ex1) ++ (show op) ++ (pshowEx 0 ex2) ++ "):" ++ (show t) 123 | pshowEx n (Add ex1 op ex2 t) = "("++(pshowEx 0 ex1) ++ (show op) ++ (pshowEx 0 ex2) ++ "):" ++ (show t) 124 | pshowEx n (FactorInt x t) = (show x) ++ ":" ++ (show t) 125 | pshowEx n (FactorReal x t) = (show x) ++ ":" ++ (show t) 126 | pshowEx n (FactorStr x t) = (show x) ++ ":" ++ (show t) 127 | pshowEx n (FactorTrue t) = "True" ++ ":" ++ (show t) 128 | pshowEx n (FactorFalse t) = "False" ++ ":" ++ (show t) 129 | pshowEx n (FactorNil t) = "Nil" ++ ":" ++ (show t) 130 | pshowEx n (FactorDesig x t) = (pshowDes 0 x) ++ ":" ++ (show t) 131 | pshowEx n (FactorNot ex t) = undefined 132 | pshowEx n (FuncCall x exs t) = x ++ "(" ++ 133 | (if length exs == 0 then "" else showListSep exs) ++"):"++(show t) 134 | -------------------------------------------------------------------------------- /src/Paskell.hs: -------------------------------------------------------------------------------- 1 | module Paskell where 2 | 3 | import Text.Parsec 4 | import Text.Parsec.String 5 | import Text.Parsec.Combinator 6 | import Data.Char 7 | import Grammar 8 | import ExtraParsers 9 | import KeywordParse 10 | import Utils (p') 11 | 12 | 13 | parseIdent :: Parser Ident 14 | parseIdent = tok . try $ do 15 | x <- letter 16 | xs <- many alphaNum 17 | let ident = x:xs 18 | if (toLower <$> ident) `elem` keywords 19 | then fail ("Expecting identifier but found keyword " ++ ident) 20 | else return ident 21 | 22 | parseType :: Parser Type 23 | parseType = tok $ 24 | (TYident <$> parseIdent) <|> 25 | (TYbool <$ stringIgnoreCase "boolean") <|> 26 | (TYint <$ stringIgnoreCase "integer") <|> 27 | (TYreal <$ stringIgnoreCase "real") <|> 28 | (TYchar <$ stringIgnoreCase "char") <|> 29 | (TYstr <$ stringIgnoreCase "string") 30 | 31 | parseIdentList :: Parser IdentList 32 | parseIdentList = sepBy1 parseIdent commaTok 33 | 34 | parseDeclVar :: Parser Decl -- var a,b : char; c,d : integer; 35 | parseDeclVar = DeclVar <$>( (parseKWvar "expecting keyword 'var'") >> 36 | ((concat <$> (many1 $ try -- todo try separating many1 into initial parse and then many for better error messages 37 | (do {xs <- parseIdentList; charTok ':'; 38 | t <- parseType; semicolTok; 39 | return $ zip xs (repeat t)}) 40 | )) "Missing or incorrect variable declaration")) 41 | 42 | parseDeclType :: Parser Decl 43 | parseDeclType = DeclType <$> ((parseKWtype "expecting keyword 'type'") >> 44 | ((concat <$> (many1 $ try -- todo try separating many1 into initial parse and then many for better error messages 45 | (do {xs <- parseIdentList; charTok '='; 46 | t <- parseType; semicolTok; 47 | return $ zip xs (repeat t)}) 48 | )) "Missing or incorrect type declaration")) 49 | 50 | parseConstDecl :: Parser [ConstDecl] 51 | parseConstDecl = undefined -- todo 52 | 53 | parseProgram :: Parser Program 54 | parseProgram = between parseKWprogram (charTok '.') 55 | (do prog <- parseIdent 56 | semicolTok 57 | blok <- parseBlock 58 | return $ Program prog blok) 59 | 60 | parseBlock :: Parser Block 61 | parseBlock = Block <$> many parseDecl <*> parseStmntSeq 62 | 63 | parseDecl :: Parser Decl 64 | parseDecl = 65 | (parseDeclType) <|> 66 | (parseDeclVar) <|> 67 | (parseDeclFunc) <|> 68 | (parseDeclProc) -- <|> 69 | -- (DeclConst <$> parseDeclConst) 70 | 71 | makeOPparser :: [(String, OP)] -> Parser OP 72 | makeOPparser xs = let f (a, b) = try (stringTok a >> return b) 73 | in foldr (<|>) (fail "Expecting operator") (map f xs) 74 | parseOP = makeOPparser operators -- any OP (relation, additive, mult, unary) 75 | parseOPunary = {-OPunary <$>-} makeOPparser unaryops 76 | parseOPadd = {-OPadd <$>-} makeOPparser addops 77 | parseOPmult = {-OPmult <$>-} makeOPparser multops 78 | parseOPrelation = {-OPrelation <$>-} makeOPparser relationops 79 | 80 | parseDesignator :: Parser Designator 81 | parseDesignator = Designator <$> parseIdent <*> try (many parseDesigProp) 82 | 83 | parseDesigProp :: Parser DesigProp 84 | parseDesigProp = 85 | (charTok '.' >> DesigPropIdent <$> parseIdent) <|> 86 | (charTok '^' >> return DesigPropPtr) <|> 87 | (DesigPropExprList <$> betweenCharTok 88 | '[' ']' parseExprList1) 89 | 90 | parseDesigList :: Parser DesigList 91 | parseDesigList = DesigList <$> many1 parseDesignator 92 | 93 | parseExprList1 :: Parser ExprList -- non-empty 94 | parseExprList1 = sepBy1 parseExpr commaTok 95 | 96 | parseExprList :: Parser ExprList -- non-empty 97 | parseExprList = sepBy parseExpr commaTok 98 | 99 | parseExpr :: Parser Expr 100 | parseExpr = (try $ Relation <$> 101 | parseSimpleExpr <*> parseOPrelation <*> parseSimpleExpr) 102 | <|> parseSimpleExpr 103 | 104 | parseSimpleExpr :: Parser Expr 105 | parseSimpleExpr = (try simpleAdd) 106 | <|> (try $ Unary <$> parseOPunary <*> simpleAdd) 107 | <|> (try $ Unary <$> parseOPunary <*> parseSimpleExpr) 108 | <|> parseTerm 109 | where simpleAdd = Add <$> parseTerm <*> parseOPadd <*> parseSimpleExpr 110 | 111 | parseTerm :: Parser Expr 112 | parseTerm = (try $ Mult <$> 113 | parseFactor <*> parseOPmult <*> parseTerm) 114 | <|> parseFactor 115 | 116 | parseFactor :: Parser Expr 117 | parseFactor = 118 | (parseKWnil >> return FactorNil) 119 | <|> (parseKWnot >> FactorNot <$> parseFactor) 120 | <|> (exactTok "true" >> return FactorTrue) -- todo double check exactTok is the right choice 121 | <|> (exactTok "false" >> return FactorFalse) 122 | <|> (parseNumber) 123 | <|> (try $ FactorChar <$> parseChar) 124 | <|> (FactorStr <$> parseString) 125 | <|> (betweenCharTok '(' ')' parseExpr) 126 | <|> (try parseFuncCall) 127 | <|> (FactorDesig <$> parseDesignator) 128 | 129 | parseStmntSeq :: Parser Statement -- non-empty 130 | parseStmntSeq = parseKWbegin 131 | >> (sepBy1 parseStatement semicolTok) 132 | >>= \stmts -> parseKWend 133 | >>= \_ -> return $ StatementSeq stmts 134 | 135 | parseStatement :: Parser Statement 136 | parseStatement = choice [parseStmntSeq, 137 | (try parseAssignment), 138 | (try parseProcCall), 139 | parseIf, parseFor, 140 | 141 | parseWhile, parseStmntWriteLn, 142 | parseStmntWrite, 143 | pure StatementEmpty] 144 | 145 | parseIf :: Parser Statement 146 | parseIf = do 147 | expr <- between parseKWif parseKWthen parseExpr 148 | stmt <- parseStatement 149 | mstmt <- optionMaybe $ parseKWelse >> parseStatement 150 | return $ StatementIf expr stmt mstmt 151 | 152 | parseCase :: Parser Statement 153 | parseCase = undefined 154 | 155 | parseRepeat :: Parser Statement 156 | parseRepeat = undefined 157 | 158 | parseWhile :: Parser Statement 159 | parseWhile = do 160 | parseKWwhile 161 | ex <- parseExpr 162 | parseKWdo 163 | s <- parseStatement 164 | return $ StatementWhile ex s 165 | 166 | parseFor :: Parser Statement 167 | parseFor = do 168 | parseKWfor 169 | x <- parseIdent 170 | stringTok ":=" 171 | expr <- parseExpr 172 | direc <- (parseKWto >> pure True) <|> (parseKWdownto >> pure False) 173 | expr2 <- parseExpr 174 | parseKWdo 175 | stmt <- parseStatement 176 | return $ StatementFor x expr direc expr2 stmt 177 | 178 | parseStmtNew :: Parser Statement 179 | parseStmtNew = undefined 180 | 181 | parseStmtDispose :: Parser Statement 182 | parseStmtDispose = undefined 183 | 184 | parseAssignment :: Parser Statement 185 | parseAssignment = parseDesignator >>= \x -> stringTok ":=" 186 | >>= \_ -> parseExpr 187 | >>= \expr -> return $ Assignment x expr 188 | 189 | parseProcCall :: Parser Statement 190 | parseProcCall = ProcCall <$> parseIdent 191 | <*> ((betweenCharTok '(' ')' parseExprList) 192 | <|> pure []) 193 | 194 | parseStmntMem :: Parser Statement 195 | parseStmntMem = undefined 196 | 197 | parseStmntWrite :: Parser Statement 198 | parseStmntWrite = StatementWrite <$> (parseKWwrite >> 199 | ((betweenCharTok '(' ')' parseExprList))) 200 | 201 | parseStmntWriteLn :: Parser Statement 202 | parseStmntWriteLn = StatementWriteLn <$> (parseKWwriteln >> 203 | ((betweenCharTok '(' ')' parseExprList))) 204 | 205 | parseFuncCall :: Parser Expr 206 | parseFuncCall = FuncCall <$> parseIdent 207 | <*> ((betweenCharTok '(' ')' parseExprList)) 208 | 209 | parseNumber :: Parser Expr 210 | parseNumber = tok $ do 211 | pre <- many1 digit 212 | post <- ((try $ char '.') >> ('.':) <$> many1 digit) <|> pure "" 213 | let xs = pre ++ post 214 | return $ if '.' `elem` xs 215 | then FactorReal $ read xs 216 | else FactorInt $ read xs 217 | 218 | parseString :: Parser String 219 | parseString = between (char '\'') (charTok '\'') $ many $ 220 | (noneOf ['\\', '\'']) <|> 221 | ((char '\\') >> anyChar >>= \c -> case toSpecialChar c 222 | of Just x -> return (fromSpecialChar x) 223 | Nothing -> if c == 'u' then undefined -- todo hex 224 | else unexpected ("char in string" ++ [c])) 225 | 226 | parseChar :: Parser Char 227 | parseChar = between (char '\'') (charTok '\'') $ 228 | (noneOf ['\\', '\'']) <|> 229 | ((char '\\') >> anyChar >>= \c -> case toSpecialChar c 230 | of Just x -> return (fromSpecialChar x) 231 | Nothing -> if c == 'u' then undefined -- todo hex 232 | else unexpected ("char literal" ++ [c])) 233 | 234 | -- parseSubprogDeclList 235 | parseDeclProc :: Parser Decl 236 | parseDeclProc = do 237 | parseKWprocedure 238 | f <- parseIdent 239 | params <- (try parseFormalParams) <|> (pure []) 240 | semicolTok 241 | blk <- parseBlock 242 | semicolTok 243 | return $ DeclProc f params blk 244 | 245 | parseDeclFunc :: Parser Decl 246 | parseDeclFunc = do 247 | parseKWfunction 248 | f <- parseIdent 249 | params <- (try parseFormalParams) <|> (pure []) 250 | charTok ':' 251 | rtype <- parseType 252 | semicolTok 253 | blk <- parseBlock 254 | semicolTok 255 | return $ DeclFunc f params rtype blk 256 | 257 | parseFormalParams :: Parser [(Ident,Type,Bool)] 258 | parseFormalParams = (concatMap id) <$> 259 | (betweenCharTok '(' ')' $ sepBy parseFormalParam semicolTok) 260 | 261 | parseFormalParam :: Parser [(Ident,Type,Bool)] 262 | parseFormalParam = do 263 | mvar <- (/= Nothing) <$> (optionMaybe parseKWvar) 264 | idents <- parseIdentList 265 | charTok ':' 266 | t <- parseType 267 | return $ map (\(x, ty) -> (x, ty, mvar)) (zip idents $ repeat t) 268 | 269 | ------------------------------------------------------ 270 | contents :: Parser a -> Parser a 271 | contents p = whitespace *> p <* eof 272 | 273 | parseToplevel :: String -> Either ParseError Program 274 | parseToplevel = parse (contents parseProgram) "" 275 | 276 | parsePascalFile :: String -> IO (Either ParseError Program) 277 | parsePascalFile = parseFromFile (contents parseProgram) 278 | -------------------------------------------------------------------------------- /src/ConvertIR.hs: -------------------------------------------------------------------------------- 1 | module ConvertIR where 2 | 3 | import Grammar as AST 4 | import qualified Intermediate as IR 5 | import Paskell (parseProgram, parseDecl) 6 | import TypeCheck (typechkProgram, TyErr, typechkDecl) 7 | 8 | import Text.Parsec 9 | import Text.Parsec.String 10 | import Text.Parsec.Combinator 11 | import Utils (p') 12 | import Data.List 13 | 14 | -- Environment is a Func/Proc signatures + stack of Contexts for types and vars 15 | type Env = (Sig, [Context], [TContext]) 16 | -- Function sig is return type + formal args types 17 | type Sig = [(Ident, (Type, [(Type, CallByRef)]))] 18 | type Context = [(Ident, Type)] 19 | type TContext = [(Ident, Type)] 20 | 21 | varInContext :: Context -> Ident -> Bool 22 | varInContext ctx x = case (lookup x ctx) of 23 | Nothing -> False 24 | Just _ -> True 25 | 26 | typeInContext :: TContext -> Ident -> Bool 27 | typeInContext tctx x = case (lookup x tctx) of 28 | Nothing -> False 29 | Just _ -> True 30 | 31 | lookupVar :: Env -> Ident -> Type 32 | lookupVar (_, contexts, _) x = case (find (`varInContext` x) contexts) of 33 | Just ctx -> case lookup x ctx of 34 | Just t -> t 35 | 36 | lookupFun :: Env -> Ident -> (Type, [(Type, CallByRef)]) 37 | lookupFun (sigs, _, _) x = 38 | case lookup x sigs of Just f -> f 39 | 40 | lookupType :: Env -> Type -> Type 41 | lookupType e@(_, _, contexts) (TYident x) = case (find (`typeInContext` x) contexts) of 42 | Nothing -> undefined --Left $ NotInScope x 43 | Just ctx -> case lookup x ctx of 44 | Nothing -> undefined -- Left $ NotInScope x 45 | Just t -> lookupType e t 46 | lookupType _ t = t 47 | 48 | newBlock :: Env -> Env 49 | newBlock (sig, ctx, tctx) = (sig, [] : ctx, [] : tctx) 50 | 51 | emptyEnv :: Env 52 | emptyEnv = ([], [], []) 53 | 54 | getSig :: Decl -> (Ident, (Type, [(Type, CallByRef)])) 55 | getSig (DeclFunc x args t _) = (x, (t, map (\(_,b,c) -> (b,c)) args)) 56 | 57 | -- updateVar :: Env -> Ident -> Type -> Either TyErr Env 58 | 59 | addVar :: Env -> Ident -> Type -> Env 60 | addVar (sig , (c:cs), tctx) x t = (sig, ((x,t):c) : cs, tctx) 61 | 62 | addFunc :: Env -> (Ident, (Type, [(Type, CallByRef)])) -> Env 63 | addFunc (sigs, ctx, tctx) (x, rest) = case lookup x sigs of 64 | Nothing -> ((x, rest) : sigs, ctx, tctx) 65 | 66 | 67 | addType :: Env -> Ident -> Type -> Env 68 | addType (sig, ctx, (c:cs)) x t = (sig, ctx, ((x,t):c) : cs) 69 | 70 | isNum = (`elem` [TYint, TYreal]) 71 | 72 | 73 | convProgram :: Program -> IR.Program 74 | convProgram (Program x b) = 75 | IR.Program x (fst $ convBlock (newBlock emptyEnv) b) Void 76 | 77 | convBlock :: Env -> Block -> (IR.Block, Env) 78 | convBlock env (Block decls s)= let 79 | (decls', env') = convDecls env decls 80 | s' = convStatement env' s in 81 | (IR.Block decls' s' Void, env) 82 | 83 | convDecls :: Env -> [Decl] -> ([IR.Decl], Env) 84 | convDecls env [] = ([], env) 85 | convDecls env (d:ds) = let 86 | (ird, env') = convDecl env d 87 | (irds, env'') = convDecls env' ds in 88 | (ird:irds, env'') 89 | 90 | convDecl :: Env -> Decl -> (IR.Decl, Env) 91 | convDecl env (DeclVar xs) = let 92 | xs' = map (\(x,t) -> (x, lookupType env t)) xs 93 | addVar' (x,t) e = addVar e x t 94 | env' = foldr addVar' env xs' in 95 | (IR.DeclVar xs' Void, env') 96 | convDecl env (DeclFunc x params t b) = let 97 | params' = (x,t,False) : params in -- added hidden variable for return value 98 | convDeclFunc env (DeclFunc x params' t b) 99 | convDecl env (DeclProc x params b) = 100 | convDeclFunc env (DeclFunc x params Void b) 101 | convDecl env (DeclType xs) = let 102 | addType' (a',b') c' = addType c' a' b' 103 | env' = foldr addType' env xs in 104 | (IR.DeclType xs Void, env') 105 | 106 | convDeclFunc :: Env -> Decl -> (IR.Decl, Env) 107 | convDeclFunc env (DeclFunc x params t b) = let 108 | r = lookupType env t 109 | resParams = resolveParamsType env params 110 | addVar' (a',b',_) c' = addVar c' a' b' 111 | env' = addFunc env (getSig $ DeclFunc x resParams r b) 112 | env'' = foldr addVar' (newBlock env') resParams in 113 | (IR.DeclFunc x resParams r (fst $ convBlock env'' b) Void, env') 114 | 115 | resolveParamsType :: Env -> [(Ident,Type,CallByRef)] -> [(Ident,Type,CallByRef)] 116 | resolveParamsType env params = map (\(x,t,b) -> (x,lookupType env t,b)) params 117 | 118 | convStatement :: Env -> Statement -> IR.Statement 119 | convStatement env (Assignment des expr) = 120 | IR.Assignment (convDesignator env des) (convExpr env expr) Void 121 | 122 | convStatement env (StatementIf expr s1 ms2) = 123 | IR.StatementIf (convExpr env expr) (convStatement env s1) ((convStatement env) <$> ms2) Void 124 | 125 | convStatement env (StatementFor i x1 b x2 s) = -- todo: add i to s's env? 126 | IR.StatementFor i (convExpr env x1) b (convExpr env x2) (convStatement env s) Void 127 | 128 | convStatement env (StatementWhile expr s) = 129 | IR.StatementWhile (convExpr env expr) (convStatement env s) Void 130 | 131 | convStatement env StatementEmpty = IR.StatementEmpty 132 | 133 | convStatement env (StatementSeq xs) = IR.StatementSeq (map (convStatement env) xs) Void 134 | 135 | convStatement env (StatementWrite xs) = IR.StatementWrite (map (convExpr env) xs) Void 136 | 137 | convStatement env (StatementWriteLn xs) = convStatement env (StatementWrite $ xs++[FactorStr "\n"]) 138 | 139 | convStatement env (ProcCall f args) = 140 | IR.ProcCall f args''' Void 141 | where (_, sig) = lookupFun env f 142 | args'' = map (convExpr env) args -- convert to type annotaed IR args 143 | args''' = map liftType (zip args'' sig) -- lift PassByRef args types to pointers, and typecast int to real when necessary 144 | liftType (expr, (ty,pbr)) = 145 | let expr' = if ty == TYreal then expr {IR.getType = TYreal} else expr 146 | in if not pbr then expr' 147 | else let IR.FactorDesig x factty = expr' 148 | in IR.FactorDesig x (TYptr factty) 149 | 150 | convDesignator :: Env -> Designator -> IR.Designator 151 | convDesignator env (Designator x _) = IR.Designator x [] (lookupVar env x) 152 | 153 | convExpr :: Env -> Expr -> IR.Expr 154 | convExpr env FactorTrue = IR.FactorTrue TYbool 155 | convExpr env FactorFalse = IR.FactorFalse TYbool 156 | convExpr env (FactorInt x) = IR.FactorInt x TYint 157 | convExpr env (FactorReal x) = IR.FactorReal x TYreal 158 | convExpr env (FactorStr x) = IR.FactorStr x TYstr 159 | convExpr env (FactorChar x) = IR.FactorStr [x] TYstr 160 | convExpr env (FactorNot x) = undefined 161 | 162 | convExpr env (FuncCall f args) = 163 | IR.FuncCall f args''' fty 164 | where (fty, sig) = lookupFun env f 165 | dummyarg = case fty of 166 | TYint -> FactorInt 0 167 | TYstr -> FactorStr "" 168 | TYbool -> FactorFalse 169 | TYreal -> FactorReal 0.0 170 | TYchar -> FactorChar '\00' 171 | args' = dummyarg : args -- add dummy arg 172 | args'' = map (convExpr env) args' -- convert to type annotaed IR args 173 | args''' = map liftType (zip args'' sig) -- lift PassByRef args types to pointers, and typecast int to real when necessary 174 | liftType (expr, (ty,pbr)) = 175 | let expr' = if ty == TYreal then expr {IR.getType = TYreal} else expr 176 | in if not pbr then expr' 177 | else let IR.FactorDesig x factty = expr' 178 | in IR.FactorDesig x (TYptr factty) 179 | 180 | convExpr env (FactorDesig des) = let 181 | (Designator x _) = des 182 | in IR.FactorDesig (convDesignator env des) (lookupVar env x) 183 | 184 | convExpr env (Unary op x) = let 185 | x' = convExpr env x 186 | in IR.Unary op x' (IR.getType x') 187 | 188 | convExpr env (Relation x1 op x2) = let 189 | x1' = convExpr env x1 190 | x2' = convExpr env x2 191 | in IR.Relation x1' op x2' TYbool 192 | 193 | convExpr env (Add x1 op x2) = let 194 | x1' = convExpr env x1 195 | x2' = convExpr env x2 196 | t1 = IR.getType x1' 197 | t2 = IR.getType x2' 198 | t = if op `elem` [OPplus, OPminus] 199 | then if t1 == TYreal then t1 else t2 200 | else TYbool 201 | in IR.Add x1' op x2' t 202 | 203 | convExpr env (Mult x1 op x2) = let 204 | x1' = convExpr env x1 205 | x2' = convExpr env x2 206 | t1 = IR.getType x1' 207 | t2 = IR.getType x2' 208 | t | op == OPstar = 209 | if t1 == TYreal then t1 else t2 210 | | op == OPdiv = TYreal 211 | | op `elem` [OPidiv, OPmod] = TYint 212 | | otherwise = TYbool 213 | in IR.Mult x1' op x2' t 214 | 215 | 216 | chkConvProgram :: Program -> Either TyErr IR.Program 217 | chkConvProgram p = case typechkProgram p of 218 | Left err -> Left err 219 | Right () -> Right $ convProgram p 220 | 221 | chkConvFile :: String -> IO () 222 | chkConvFile path = let p = parseFromFile parseProgram path 223 | in p >>= \pp -> print $ chkConvProgram <$> pp 224 | 225 | chkConvProgram' :: String -> Either String IR.Program 226 | chkConvProgram' s = let p = p' parseProgram s in 227 | case p of Left x -> Left $ show x 228 | Right pp -> case chkConvProgram pp of 229 | Left y -> Left $ show y 230 | Right d -> Right d 231 | 232 | 233 | chkConvDecl :: Decl -> Either TyErr IR.Decl 234 | chkConvDecl d = case typechkDecl ([],[],[]) d of 235 | Left err -> Left err 236 | _ -> Right $ fst $ convDecl ([],[],[]) d 237 | 238 | chkConvDecl' :: String -> Either String IR.Decl 239 | chkConvDecl' s = let p = p' parseDecl s in 240 | case p of Left x -> Left $ show x 241 | Right pp -> case chkConvDecl pp of 242 | Left y -> Left $ show y 243 | Right d -> Right d 244 | -------------------------------------------------------------------------------- /src/TypeCheck.hs: -------------------------------------------------------------------------------- 1 | module TypeCheck where 2 | 3 | import Grammar 4 | import Paskell 5 | import Utils (p') 6 | import Data.List 7 | import Control.Monad (msum) 8 | 9 | -- Environment is a Func/Proc signatures + stack of Contexts 10 | type Env = (Sig, [Context], [TContext]) 11 | -- Function sig is return type + formal args types 12 | type Sig = [(Ident, (Type, [(Type,CallByRef)]))] 13 | type Context = [(Ident, Type)] 14 | type TContext = [(Ident, Type)] 15 | 16 | data TyErr = NotInScope Ident 17 | | TypeMismatch Type Type 18 | | TypeMismatchOrd Type 19 | | TypeMismatchNum Type 20 | | ArgCountMismatch Int 21 | | ArgTypeMismatch Type Type 22 | | CondTypeMismatch Type 23 | | VarRedecl Ident 24 | | FuncRedecl Ident 25 | | VaraibleArgExpected Expr 26 | | TypeRedecl Ident 27 | | UnknownType Ident 28 | deriving (Show, Eq) 29 | 30 | 31 | varInContext :: Context -> Ident -> Bool 32 | varInContext ctx x = case (lookup x ctx) of 33 | Nothing -> False 34 | Just _ -> True 35 | 36 | typeInContext :: TContext -> Ident -> Bool 37 | typeInContext tctx x = case (lookup x tctx) of 38 | Nothing -> False 39 | Just _ -> True 40 | 41 | lookupVar :: Env -> Ident -> Either TyErr Type 42 | lookupVar (_, contexts, _) x = case (find (`varInContext` x) contexts) of 43 | Nothing -> Left $ NotInScope x 44 | Just ctx -> case lookup x ctx of 45 | Nothing -> Left $ NotInScope x 46 | Just t -> Right t 47 | 48 | lookupFun :: Env -> Ident -> Either TyErr (Type, [(Type,CallByRef)]) 49 | lookupFun (sigs, _, _) x = 50 | case lookup x sigs of 51 | Nothing -> Left $ NotInScope x 52 | Just f -> Right f 53 | 54 | lookupType :: Env -> Type -> Either TyErr Type 55 | lookupType e@(_, _, contexts) (TYident x) = case (find (`typeInContext` x) contexts) of 56 | Nothing -> Left $ UnknownType x 57 | Just ctx -> case lookup x ctx of 58 | Nothing -> Left $ UnknownType x 59 | Just t -> lookupType e t 60 | lookupType _ t = Right t 61 | 62 | -- eqType env t1 t2 = (lookupType env t1) == (lookupType env t2) 63 | 64 | newBlock :: Env -> Env 65 | newBlock (sig, ctx, tctx) = (sig, [] : ctx, [] : tctx) 66 | 67 | emptyEnv :: Env 68 | emptyEnv = ([], [], []) 69 | 70 | getSig :: Decl -> (Ident, (Type, [(Type,CallByRef)])) 71 | getSig (DeclFunc x args t _) = (x, (t, map (\(_,b,c) -> (b,c)) args)) 72 | 73 | -- updateVar :: Env -> Ident -> Type -> Either TyErr Env 74 | 75 | addVar :: Env -> Ident -> Type -> Either TyErr Env 76 | addVar (sig, (c:cs), tctx) x t = if varInContext c x 77 | then Left $ VarRedecl x 78 | else Right $ (sig, ((x,t):c) : cs, tctx) 79 | 80 | addFunc :: Env -> (Ident, (Type, [(Type,CallByRef)])) -> Either TyErr Env 81 | addFunc (sigs, ctx, tctx) (x, rest) = 82 | case lookup x sigs of 83 | Just _ -> Left $ FuncRedecl x 84 | Nothing -> Right ((x, rest) : sigs, ctx, tctx) 85 | 86 | addType :: Env -> Ident -> Type -> Either TyErr Env 87 | addType (sig, ctx, (c:cs)) x t = if typeInContext c x 88 | then Left $ TypeRedecl x 89 | else Right $ (sig, ctx, ((x,t):c) : cs) 90 | addType env x t = error $ (show env) ++ (show x) ++ (show t) 91 | 92 | -- isLeft :: (Either a b) -> Bool 93 | -- isLeft (Left _) = True 94 | -- isLeft _ = False 95 | -- pickLeft x y = if isLeft x then x else y 96 | -- fromRight (Right x) = x 97 | 98 | isNum = (`elem` [TYint, TYreal]) 99 | 100 | typechkProgram :: Program -> Either TyErr () 101 | typechkProgram (Program _ b) = typechkBlock (newBlock emptyEnv) b >> return () 102 | 103 | typechkBlock :: Env -> Block -> Either TyErr Env 104 | typechkBlock env (Block ds s) = 105 | typechkDecls env ds >>= \e -> typechkStatement e s 106 | 107 | typechkDecls :: Env -> [Decl] -> Either TyErr Env 108 | typechkDecls env [] = Right env 109 | typechkDecls env (d:ds) = 110 | typechkDecl env d >>= \e -> typechkDecls e ds 111 | 112 | typechkDecl :: Env -> Decl -> Either TyErr Env 113 | typechkDecl env (DeclVar []) = Right env 114 | typechkDecl env (DeclVar (d:ds)) = let (x,t) = d in 115 | lookupType env t >>= \r -> 116 | (addVar env x r) >>= \e -> typechkDecl e (DeclVar ds) 117 | typechkDecl env (DeclFunc x params t b) = lookupType env t >>= \r -> 118 | typechkDeclFunc env (DeclFunc x params' r b) 119 | where params' = (x,t,False) : params -- added hidden variable for return value 120 | typechkDecl env (DeclProc x params b) = typechkDeclFunc env (DeclFunc x params Void b) 121 | typechkDecl env (DeclType []) = Right env 122 | typechkDecl env (DeclType (t:ts)) = let (x,ty) = t in 123 | (addType env x ty) >>= \e -> typechkDecl e (DeclType ts) 124 | 125 | resolveParamsType :: Env -> [(Ident,Type,CallByRef)] -> Either TyErr [(Ident,Type,CallByRef)] 126 | resolveParamsType env params = 127 | mapM (\(x,t,b) -> lookupType env t >>= \r -> Right (x,r,b)) params 128 | 129 | typechkConstDecl :: Env -> ConstDecl -> Either TyErr Env 130 | typechkConstDecl env _ = undefined 131 | 132 | typechkDeclFunc :: Env -> Decl -> Either TyErr Env 133 | typechkDeclFunc env (DeclFunc x params t b) = 134 | resolveParamsType env params >>= \resParams -> -- replace params type aliases with their real types 135 | let sig = getSig (DeclFunc x resParams t b) in 136 | (addFunc env sig) >>= \e -> 137 | typechkDecls (newBlock e) (map (\(i,ty,_) -> DeclVar [(i, ty)]) resParams) >>= \e2 -> 138 | typechkBlock e2 b >> Right e 139 | 140 | 141 | typechkStatement :: Env -> Statement -> Either TyErr Env 142 | typechkStatement env (Assignment (Designator x _) expr) = 143 | gettype env expr >>= \t -> lookupVar env x >>= \xtype -> 144 | if xtype == t 145 | || (xtype == TYstr && t == TYchar) 146 | || (xtype == TYreal && t == TYint) 147 | then Right env 148 | else Left $ TypeMismatch xtype t 149 | 150 | typechkStatement env (StatementIf expr s1 ms2) = 151 | gettype env expr >>= \t -> 152 | if t /= TYbool 153 | then Left $ TypeMismatch TYbool t 154 | else let tchk1 = typechkStatement env s1 155 | mtchk2 = (typechkStatement env) <$> ms2 156 | in case mtchk2 of 157 | Just tchk2 -> tchk1 >> tchk2 >> Right env 158 | Nothing -> tchk1 >> Right env 159 | 160 | typechkStatement env (StatementFor i x1 _ x2 s) = -- todo: add i to s's env? 161 | lookupVar env i >>= \t -> 162 | if (t /= TYint) && (t /= TYchar) 163 | then Left $ TypeMismatchOrd t 164 | else gettype env x1 >>= \t1 -> 165 | if t1 /= t 166 | then Left $ TypeMismatch t t1 167 | else gettype env x2 >>= \t2 -> 168 | if t2 /= t 169 | then Left $ TypeMismatch t t2 170 | else typechkStatement env s 171 | 172 | typechkStatement env (StatementWhile expr s) = 173 | gettype env expr >>= \t -> 174 | if t /= TYbool then Left $ CondTypeMismatch t 175 | else typechkStatement env s 176 | 177 | typechkStatement env StatementEmpty = Right env 178 | 179 | typechkStatement env (StatementSeq xs) = 180 | foldr (>>) (Right env) (map (typechkStatement env) xs) 181 | 182 | typechkStatement env (StatementWrite xs) = 183 | foldr (>>) (Right env) (map (gettype env) xs) 184 | typechkStatement env (StatementWriteLn xs) = 185 | typechkStatement env (StatementWrite xs) 186 | 187 | typechkStatement env (ProcCall x args) = lookupFun env x >>= 188 | \(t, formalTs) -> 189 | if length formalTs /= length args 190 | then Left $ ArgCountMismatch (length formalTs) 191 | else case foldr (>>) (Right t) (zipWith (matchArgFormal env) args formalTs) 192 | of Right _ -> Right env 193 | Left err -> Left err 194 | 195 | typechkStatement env _ = Right env -- todo 196 | 197 | -- check if argument matches expected formal parameter 198 | matchArgFormal :: Env -> Expr -> (Type, Bool) -> Either TyErr Type 199 | matchArgFormal env expr (ty, callbyref) = gettype env expr >>= \exprT -> 200 | if callbyref 201 | then -- has to be a Designator of the exact same type 202 | if exprT /= ty 203 | then Left $ ArgTypeMismatch ty exprT 204 | else if (not $ isFactorDesig expr) && callbyref -- a CallByRef argument has to be a FactorDesig 205 | then Left $ VaraibleArgExpected expr 206 | else Right ty 207 | else -- typecasts 208 | if ty == exprT 209 | || (ty == TYreal && exprT == TYint) 210 | || (ty == TYstr && exprT == TYchar) 211 | then Right ty 212 | else Left $ ArgTypeMismatch ty exprT 213 | where isFactorDesig a = case a of 214 | FactorDesig _ -> True 215 | _ -> False 216 | 217 | gettype :: Env -> Expr -> Either TyErr Type 218 | gettype env FactorTrue = Right TYbool 219 | gettype env FactorFalse = Right TYbool 220 | gettype env (FactorInt _) = Right TYint 221 | gettype env (FactorReal _) = Right TYreal 222 | gettype env (FactorStr _) = Right TYstr 223 | gettype env (FactorChar _) = Right TYchar 224 | gettype env (FactorNot x) = undefined 225 | 226 | gettype env (FuncCall x args) = lookupFun env x >>= 227 | \(t, formalTs') -> let formalTs = tail formalTs' in -- discard the dummy formal parameter 228 | if length formalTs /= length args 229 | then Left $ ArgCountMismatch (length formalTs) 230 | else foldr (>>) (Right t) (zipWith (matchArgFormal env) args formalTs) 231 | 232 | gettype env (FactorDesig (Designator x _)) = 233 | lookupVar env x 234 | 235 | gettype env (Unary op x) = 236 | gettype env x >>= \t -> 237 | if isNum t then Right t 238 | else Left $ TypeMismatchNum t 239 | 240 | gettype env (Relation x1 op x2) = 241 | t1 >>= \v1 -> t2 >>= \v2 -> 242 | if (v1 == v2) || (isNum v1 && isNum v2) 243 | then Right TYbool 244 | else Left $ TypeMismatch v1 v2 245 | where [t1, t2] = (gettype env) <$> [x1, x2] 246 | 247 | gettype env (Add x1 op x2) 248 | | op `elem` [OPplus, OPminus] = 249 | t1 >>= \v1 -> t2 >>= \v2 -> 250 | if not (isNum v1 && isNum v2) 251 | then Left $ TypeMismatchNum (if isNum v1 then v1 else v2) 252 | else if v1 == TYreal then t1 else t2 253 | | otherwise = 254 | t1 >>= \v1 -> t2 >>= \v2 -> 255 | if (v1 /= TYbool) || (v2 /= TYbool) 256 | then Left $ TypeMismatch TYbool (if v1 /= TYbool then v1 else v2) 257 | else t1 258 | where [t1, t2] = (gettype env) <$> [x1, x2] 259 | 260 | gettype env (Mult x1 op x2) 261 | | op == OPstar = 262 | t1 >>= \v1 -> t2 >>= \v2 -> 263 | if not (isNum v1 && isNum v2) 264 | then Left $ TypeMismatchNum (if isNum v1 then v1 else v2) 265 | else if v1 == TYreal then t1 else t2 266 | | op == OPdiv = 267 | t1 >>= \v1 -> t2 >>= \v2 -> 268 | if not (isNum v1 && isNum v2) 269 | then Left $ TypeMismatchNum (if isNum v1 then v1 else v2) 270 | else Right TYreal 271 | | op `elem` [OPmod, OPidiv] = 272 | t1 >>= \v1 -> t2 >>= \v2 -> 273 | if not (v1 == TYint && v2 == TYint) 274 | then Left $ TypeMismatch TYint (if v1 == TYint then v1 else v2) 275 | else Right TYint 276 | | otherwise = 277 | t1 >>= \v1 -> t2 >>= \v2 -> 278 | if (v1 /= TYbool) || (v2 /= TYbool) 279 | then Left $ TypeMismatch TYbool (if v1 /= TYbool then v1 else v2) 280 | else t1 281 | where [t1, t2] = (gettype env) <$> [x1, x2] 282 | 283 | typechkStr s env = case p' parseStatement s of 284 | Right st -> typechkStatement env st 285 | Left err -> error $ "Parse error:\n\t" ++ s ++ "\nin:" ++ show err 286 | -------------------------------------------------------------------------------- /src/Emit.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module Emit where 4 | 5 | import qualified Intermediate as IR 6 | import TypeCheck (isNum) 7 | import qualified Grammar as G ( Program, Type(..) ) 8 | import Grammar (OP(..), Ident, IdentList, 9 | VarDecl, TypeDecl, CallByRef, ToDownTo) 10 | 11 | import Control.Monad.Except hiding (void) 12 | import Control.Applicative 13 | 14 | import Utils (p') 15 | import Paskell as P 16 | 17 | import LLVM.AST 18 | import LLVM.AST.AddrSpace 19 | import LLVM.AST.Attribute (ParameterAttribute) 20 | import qualified LLVM.AST as AST 21 | import qualified LLVM.AST.Constant as C 22 | import qualified LLVM.AST.Float as F 23 | import qualified LLVM.AST.IntegerPredicate as IP 24 | import qualified LLVM.AST.FloatingPointPredicate as FP 25 | 26 | import LLVM.AST.Global 27 | import LLVM.Context 28 | import LLVM.Module 29 | import LLVM.AST.ParameterAttribute 30 | import qualified Data.ByteString.Char8 as BS 31 | import Data.ByteString.Short hiding (length) 32 | import qualified ConvertIR as Conv 33 | import qualified Intermediate as IR 34 | import Codegen 35 | 36 | 37 | ------------------------------------------------------------------------------- 38 | -- Type conversions 39 | ------------------------------------------------------------------------------- 40 | 41 | toShortBS = toShort . BS.pack 42 | toString = BS.unpack . fromShort 43 | name' = Name . toShortBS 44 | 45 | toLLVMType :: G.Type -> Type 46 | toLLVMType t = case t of 47 | G.TYint -> int 48 | G.TYbool -> bool 49 | G.TYreal -> double 50 | G.Void -> void 51 | G.TYstr -> str 52 | G.TYchar -> str 53 | G.TYptr t -> PointerType (toLLVMType t) (AddrSpace 0) 54 | _ -> error $ "TYident wasn't resolved. " ++ (show t) 55 | 56 | -- add ParameterAtribute to an argument given the argument and 57 | -- it's corrosponding Operand 58 | addParamAttr :: IR.Expr -> Operand -> (Operand, [ParameterAttribute]) 59 | addParamAttr expr oper = let ty = IR.getType expr in 60 | case ty of 61 | G.TYptr t -> (oper, [Dereferenceable (typeSize t)]) 62 | _ -> (oper, []) 63 | 64 | typeSize ty = case ty of 65 | G.TYreal -> 8 66 | G.TYint -> 4 67 | _ -> 1 68 | 69 | isPtrPtr :: Operand -> Bool -- checks for a double pointer 70 | isPtrPtr oper = case oper of 71 | LocalReference (PointerType (PointerType _ _) _) _ -> True 72 | _ -> False 73 | 74 | ------------------------------------------------------------------------------- 75 | -- Top-level interface 76 | ------------------------------------------------------------------------------- 77 | 78 | liftError :: ExceptT String IO a -> IO a 79 | liftError = runExceptT >=> either fail return 80 | 81 | -- LLVM-IR and LLVM-AST given IR 82 | codegen :: AST.Module -> IR.Program -> IO (AST.Module, String) 83 | codegen mod pr = withContext $ \context -> 84 | liftIO $ withModuleFromAST context newast $ \m -> 85 | do llstr <- moduleLLVMAssembly m 86 | return (newast, BS.unpack llstr) 87 | where newast = runLLVM mod (genProgram pr) 88 | 89 | -- LLVM-IR given parse tree 90 | printllvm :: G.Program -> IO String 91 | printllvm ast = let ir = Conv.convProgram ast in 92 | do (llvmast, llstr) <- codegen (emptyModule "MainModule") ir 93 | return llstr 94 | 95 | ------------------------------------------------------------------------------- 96 | -- Declarations and Blocks 97 | ------------------------------------------------------------------------------- 98 | 99 | genDeclFunc :: IR.Decl -> LLVM () 100 | genDeclFunc (IR.DeclFunc x args retty blk _) = do 101 | define (toLLVMType retty) (toShortBS x) (toSig args) body 102 | where 103 | toSig xs = map (\(a,b,c) -> (toLLVMType (if c then G.TYptr b else b), name' a, if c then [Dereferenceable (typeSize b)] else [])) xs 104 | body = do 105 | entry' <- addBlock "entry" 106 | setBlock entry' 107 | forM args $ \(i,t,byref) -> do 108 | var <- alloca' $ toLLVMType $ if byref then G.TYptr t else t 109 | store var (local (toLLVMType t) (name' i)) 110 | assign (toShortBS i) var 111 | defs <- genBlock blk 112 | -- return dummy variable value for functions, or void for procedures 113 | if retty == G.Void 114 | then retvoid 115 | else getvar (toShortBS x) (toLLVMType retty) 116 | >>= load (toLLVMType retty) >>= ret 117 | return defs 118 | 119 | -- used for local var decls only 120 | genDeclVar :: IR.Decl -> Codegen () 121 | genDeclVar (IR.DeclVar xs _) = do 122 | forM xs $ \(i,t) -> do 123 | var <- alloca (toLLVMType t) 124 | assign (toShortBS i) var 125 | return () 126 | 127 | genDeclGlob :: IR.Decl -> LLVM () 128 | genDeclGlob d@(IR.DeclVar _ _) = genDeclVarGlob d 129 | genDeclGlob d@(IR.DeclFunc _ _ _ _ _) = genDeclFunc d 130 | genDeclGlob _ = return () 131 | 132 | 133 | genDeclVarGlob :: IR.Decl -> LLVM () 134 | genDeclVarGlob (IR.DeclVar xs _) = do 135 | forM xs $ \(i,t) -> gvar (toLLVMType t) (name' i) 136 | return () 137 | 138 | -- generate entry point main() 139 | genMain :: IR.Statement -> LLVM () 140 | genMain s = genDeclFunc (IR.DeclFunc "main" args G.TYint (IR.Block [] s G.Void) G.Void) 141 | where args = [("main", G.TYint, False)] -- dummy return value variable 142 | 143 | genProgram :: IR.Program -> LLVM () 144 | genProgram (IR.Program p (IR.Block ds s _) _) = do 145 | addDefn printf 146 | forM ds genDeclGlob 147 | genMain s 148 | 149 | genBlock :: IR.Block -> Codegen [Definition] 150 | genBlock (IR.Block ds s _) = do 151 | forM ds genDeclVar 152 | genStatement s 153 | 154 | ------------------------------------------------------------------------------- 155 | -- Statements 156 | ------------------------------------------------------------------------------- 157 | 158 | genStatement :: IR.Statement -> Codegen [Definition] 159 | genStatement (IR.StatementEmpty) = return [] 160 | genStatement (IR.StatementSeq xs _) = (forM xs genStatement) >>= (return.concat) 161 | genStatement (IR.Assignment (IR.Designator x _ xt) expr _) = do 162 | (rhs, defs) <- genExpr expr 163 | var <- getvar (toShortBS x) (toLLVMType xt) -- var is a pointer 164 | if not (isPtrPtr var) 165 | then store var rhs -- store value at memory referred to by pointer 166 | else do -- if var is a pointer to pointer, this means we have something like *x = 123 and we should derference the pointer first 167 | ptr <- load (toLLVMType G.Void) var 168 | store ptr rhs 169 | return defs 170 | 171 | genStatement (IR.StatementIf expr s1 ms2 _) = do 172 | ifthen <- addBlock "if.then" 173 | ifelse <- addBlock "if.else" 174 | ifexit <- addBlock "if.exit" 175 | 176 | (cond, defs1) <- genExpr expr 177 | cbr cond ifthen ifelse 178 | 179 | -- if.then 180 | setBlock ifthen 181 | defs2 <- genStatement s1 182 | br ifexit 183 | getBlock 184 | 185 | -- if.else 186 | setBlock ifelse 187 | defs3 <- genStatement s2 188 | br ifexit 189 | getBlock 190 | 191 | -- if.exit 192 | setBlock ifexit 193 | return $ defs1 ++ defs2 ++ defs3 194 | where s2 = case ms2 of 195 | Nothing -> IR.StatementEmpty 196 | Just x -> x 197 | 198 | genStatement (IR.StatementFor x expr1 todownto expr2 s _) = do 199 | ftest <- addBlock "for.test" 200 | fbody <- addBlock "for.body" 201 | fstep <- addBlock "for.step" 202 | fexit <- addBlock "for.exit" 203 | 204 | defs1 <- genStatement (IR.Assignment loopvar expr1 G.Void) 205 | br ftest 206 | 207 | -- for.test 208 | setBlock ftest 209 | (cond, defs2) <- genExpr (IR.Relation varfactor optest expr2 G.TYbool) 210 | cbr cond fbody fexit 211 | 212 | -- for.body 213 | setBlock fbody 214 | defs3 <- genStatement s 215 | br fstep 216 | 217 | -- for.step 218 | setBlock fstep 219 | -- todo implment Char stepping 220 | defs4 <- genStatement (IR.Assignment loopvar step G.Void) 221 | br ftest 222 | 223 | -- for.exit 224 | setBlock fexit 225 | return $ defs1 ++ defs2 ++ defs3 ++ defs4 226 | where loopvar = IR.Designator x [] (IR.getType expr1) 227 | varfactor = IR.FactorDesig loopvar (IR.getType expr1) 228 | (optest, opstep) = if todownto then (OPle, OPplus) else (OPge, OPminus) 229 | step = IR.Add varfactor opstep (IR.FactorInt 1 G.TYint) (IR.getType expr1) 230 | 231 | 232 | genStatement (IR.StatementWhile expr s _) = do 233 | wtest <- addBlock "while.test" 234 | wbody <- addBlock "while.body" 235 | wexit <- addBlock "while.exit" 236 | br wtest 237 | 238 | -- entry 239 | setBlock wtest 240 | (cond, defs1) <- genExpr expr 241 | _ <- cbr cond wbody wexit 242 | 243 | -- body 244 | _ <- setBlock wbody 245 | defs2 <- genStatement s 246 | br wtest 247 | 248 | -- exit 249 | _ <- setBlock wexit 250 | return $ defs1 ++ defs2 251 | 252 | genStatement (IR.ProcCall f xs t) = do 253 | (args, defs) <- mapM genExpr xs >>= (return.unzip) 254 | call' (externf fnty (name' f)) (zipWith addParamAttr xs args) 255 | return $ concat defs 256 | where fnty = toLLVMfnType (toLLVMType t) (map (toLLVMType . IR.getType) xs) 257 | 258 | genStatement (IR.StatementWrite xs' _) = do 259 | (args, defs) <- mapM genExpr xs >>= (return.unzip) 260 | -- error $ show $ (zipWith addParamAttr xs args) 261 | callNoCast (externf printfTy (name' "printf")) (zipWith addParamAttr xs args) 262 | return $ concat defs 263 | where fstr = (foldr (++) "" (map (formatstr. IR.getType) xs')) ++ "\00" 264 | xs = (IR.FactorStr fstr G.TYstr) : xs' -- add printf format string to arguments 265 | 266 | formatstr :: G.Type -> String 267 | formatstr G.TYint = "%d" 268 | formatstr G.TYstr = "%s" 269 | formatstr G.TYreal = "%f" 270 | formatstr G.TYbool = "%d" 271 | formatstr G.TYchar = "%s" 272 | 273 | ------------------------------------------------------------------------------- 274 | -- Expressions 275 | ------------------------------------------------------------------------------- 276 | 277 | -- returns %x for final expression value, and stores any intermediate instructions in the block 278 | genExpr :: IR.Expr -> Codegen (Operand, [Definition]) 279 | genExpr (IR.FactorInt x _) = return (cons $ C.Int 32 (fromIntegral x), []) 280 | genExpr (IR.FactorReal x _) = return (cons $ C.Float (F.Double x), []) 281 | genExpr (IR.FactorStr x _) = do 282 | strglobal <- freshStrName 283 | def <- return $ gstrVal' (name' strglobal) x' 284 | (ConstantOperand ptr) <- getvar (toShortBS strglobal) (charArrType $ length x') 285 | oper <- return $ cons $ C.GetElementPtr True ptr [C.Int 32 0, C.Int 32 0] 286 | return (oper, [def]) 287 | where x' = if last x /= '\00' then x ++ "\00" else x 288 | genExpr (IR.FactorTrue _) = return (cons $ C.Int 1 1, []) 289 | genExpr (IR.FactorFalse _) = return (cons $ C.Int 1 0, []) 290 | genExpr (IR.Relation x1 op x2 _) = let 291 | (t1,t2) = (IR.getType x1, IR.getType x2) 292 | cmpFloat y1 y2 = do 293 | fy1 <- if t1 == G.TYint then sitofp double y1 else return y1 294 | fy2 <- if t2 == G.TYint then sitofp double y2 else return y2 295 | case op of 296 | OPless -> fcmp FP.OLT fy1 fy2 297 | OPle -> fcmp FP.OLE fy1 fy2 298 | OPgreater -> fcmp FP.OGT fy1 fy2 299 | OPge -> fcmp FP.OGE fy1 fy2 300 | OPeq -> fcmp FP.OEQ fy1 fy2 301 | OPneq -> fcmp FP.ONE fy1 fy2 302 | cmpInt y1 y2 = do 303 | case op of 304 | OPless -> icmp IP.SLT y1 y2 305 | OPle -> icmp IP.SLE y1 y2 306 | OPgreater -> icmp IP.SGT y1 y2 307 | OPge -> icmp IP.SGE y1 y2 308 | OPeq -> icmp IP.EQ y1 y2 309 | OPneq -> icmp IP.NE y1 y2 310 | cmp = if t1 == G.TYreal || t2 == G.TYreal 311 | then cmpFloat 312 | else if t1 `elem` [G.TYint, G.TYbool] || t2 `elem` [G.TYint,G.TYbool] 313 | then cmpInt -- int and bool 314 | else undefined -- todo: other cases 315 | in do 316 | (y1, defs1) <- genExpr x1 317 | (y2, defs2) <- genExpr x2 318 | oper <- cmp y1 y2 319 | return (oper, defs1 ++ defs2) 320 | 321 | genExpr (IR.Add x1 op x2 t) = do 322 | (y1, defs1) <- genExpr x1 323 | (y2, defs2) <- genExpr x2 324 | oper <- case t of 325 | G.TYbool -> bor y1 y2 326 | G.TYint -> (if op == OPplus then iadd else isub) y1 y2 327 | G.TYreal -> do 328 | fy1 <- if IR.getType x1 == G.TYint then sitofp double y1 else return y1 329 | fy2 <- if IR.getType x2 == G.TYint then sitofp double y2 else return y2 330 | (if op == OPplus then fadd else fsub) fy1 fy2 331 | return (oper, defs1 ++ defs2) 332 | 333 | genExpr (IR.Mult x1 op x2 t) = do 334 | let (t1,t2) = (IR.getType x1, IR.getType x2) 335 | (y1, defs1) <- genExpr x1 336 | (y2, defs2) <- genExpr x2 337 | oper <- case op of 338 | OPdiv -> do 339 | fy1 <- sitofp double y1 340 | fy2 <- sitofp double y2 341 | fdiv fy1 fy2 342 | OPstar -> do 343 | fy1 <- if t1 == G.TYint then sitofp double y1 else return y1 344 | fy2 <- if t2 == G.TYint then sitofp double y2 else return y2 345 | if t1 == G.TYreal || t2 == G.TYreal 346 | then fmul fy1 fy2 347 | else imul y1 y2 348 | OPidiv -> idiv y1 y2 349 | OPmod -> imod y1 y2 350 | OPand -> band y1 y2 351 | return (oper, defs1 ++ defs2) 352 | 353 | genExpr (IR.Unary op x t) = do 354 | (y, defs) <- genExpr x 355 | oper <- case op of 356 | OPor -> undefined 357 | OPplus -> return y 358 | OPminus -> fst <$> (genExpr $ IR.Add (IR.FactorInt 0 G.TYint) op x t) 359 | return (oper, defs) 360 | 361 | genExpr (IR.FuncCall f xs t) = do 362 | (args, defs) <- mapM genExpr xs >>= (return.unzip) 363 | oper <- call (externf fnty (name' f)) (zipWith addParamAttr xs args) 364 | return (oper, concat defs) 365 | where fnty = toLLVMfnType (toLLVMType t) (map (toLLVMType . IR.getType) xs) 366 | -- error $ (show $ map IR.getType xs) 367 | 368 | genExpr (IR.FactorDesig (IR.Designator x _ xt) dt) = 369 | (getvar (toShortBS x) (toLLVMType xt)) 370 | >>= (if dt==xt then load (toLLVMType dt) else return . id) 371 | >>= \oper -> return (oper, []) -------------------------------------------------------------------------------- /src/Codegen.hs: -------------------------------------------------------------------------------- 1 | -- Based on Stephen Diehl's work 2 | 3 | {-# LANGUAGE OverloadedStrings #-} 4 | {-# LANGUAGE GeneralizedNewtypeDeriving #-} 5 | 6 | module Codegen where 7 | 8 | import Data.ByteString.Short hiding (length) 9 | import Data.Monoid ((<>)) 10 | import Data.Word 11 | import Data.String 12 | import Data.List 13 | import Data.Function 14 | import qualified Data.Map as Map 15 | 16 | import Control.Applicative 17 | import Control.Monad.State 18 | 19 | import LLVM.AST 20 | import LLVM.AST.Typed (typeOf) 21 | import LLVM.AST.AddrSpace 22 | import LLVM.AST.Type hiding (double) 23 | import LLVM.AST.Global as G 24 | import qualified LLVM.AST as AST 25 | 26 | import qualified LLVM.AST.Linkage as L 27 | import qualified LLVM.AST.Constant as C 28 | import qualified LLVM.AST.Attribute as A 29 | import qualified LLVM.AST.CallingConvention as CC 30 | import qualified LLVM.AST.FloatingPointPredicate as FP 31 | import qualified LLVM.AST.IntegerPredicate as IP 32 | 33 | import LLVM.Context 34 | import LLVM.Module 35 | import qualified Data.ByteString.Char8 as BS 36 | 37 | import Data.Char (ord) 38 | 39 | ------------------------------------------------------------------------------- 40 | -- Module Level 41 | ------------------------------------------------------------------------------- 42 | 43 | newtype LLVM a = LLVM (State AST.Module a) 44 | deriving (Functor, Applicative, Monad, MonadState AST.Module ) 45 | 46 | runLLVM :: AST.Module -> LLVM a -> AST.Module 47 | runLLVM mod (LLVM m) = execState m mod 48 | 49 | emptyModule :: ShortByteString -> AST.Module 50 | emptyModule label = defaultModule { moduleName = label } 51 | 52 | addDefns :: [Definition] -> LLVM () 53 | addDefns ds = do 54 | forM ds $ \d -> 55 | addDefn d 56 | return () 57 | 58 | addDefn :: Definition -> LLVM () 59 | addDefn d = do 60 | defs <- gets moduleDefinitions 61 | modify $ \s -> s { moduleDefinitions = defs ++ [d] } 62 | 63 | define :: Type -> ShortByteString -> [(Type, Name, [A.ParameterAttribute])] -> Codegen [Definition] -> LLVM () 64 | define retty label argtys body = (addDefns bodydefs) >> addDefn ( 65 | GlobalDefinition $ functionDefaults { 66 | name = Name label 67 | , parameters = ([Parameter ty nm att | (ty, nm, att) <- argtys], False) 68 | , returnType = retty 69 | , basicBlocks = bls 70 | }) 71 | where 72 | (bodydefs, bodystate) = runStateCodegen body (label) 73 | bls = createBlocks $ bodystate 74 | 75 | external :: Type -> ShortByteString -> [(Type, Name)] -> LLVM () 76 | external retty label argtys = addDefn $ 77 | GlobalDefinition $ functionDefaults { 78 | name = Name label 79 | , linkage = L.External 80 | , parameters = ([Parameter ty nm [] | (ty, nm) <- argtys], False) 81 | , returnType = retty 82 | , basicBlocks = [] 83 | } 84 | 85 | gvar :: Type -> Name -> LLVM () 86 | gvar ty name = addDefn $ gvar' ty name 87 | 88 | gvar' :: Type -> Name -> Definition 89 | gvar' ty name = 90 | GlobalDefinition globalVariableDefaults 91 | { name = name 92 | , G.type' = ty 93 | , linkage = L.Weak 94 | , initializer = Just $ C.Null ty 95 | } 96 | 97 | gstrVal :: Name -> String -> LLVM () 98 | gstrVal name val = addDefn $ gstrVal' name val 99 | 100 | -- String literals are declared in global scope 101 | gstrVal' :: Name -> String -> Definition 102 | gstrVal' name val = 103 | GlobalDefinition globalVariableDefaults 104 | { name = name 105 | , G.type' = charArrType (length val) 106 | , linkage = L.Private 107 | , unnamedAddr = Just GlobalAddr 108 | , isConstant = True 109 | , initializer = Just $ C.Array (IntegerType 8) (map constchar val) 110 | } 111 | where constchar c = C.Int 8 (toInteger $ ord c) 112 | 113 | printf :: Definition 114 | printf = GlobalDefinition $ functionDefaults 115 | { returnType = int 116 | , name = Name "printf" 117 | , parameters = ([Parameter str (UnName 0) []], True) 118 | } 119 | 120 | printfTy :: Type 121 | printfTy = PointerType {pointerReferent = (FunctionType int [str] True), 122 | pointerAddrSpace = AddrSpace 0} 123 | 124 | -- construct function type given ret type and signature 125 | toLLVMfnType :: Type -> [Type] -> Type 126 | toLLVMfnType t ts = PointerType {pointerReferent = (FunctionType t ts False), 127 | pointerAddrSpace = AddrSpace 0} 128 | 129 | fnPtr :: Name -> LLVM Type 130 | fnPtr nm = findType <$> gets moduleDefinitions 131 | where 132 | findType defs = 133 | case fnDefByName of 134 | [] -> error $ "Undefined function: " ++ show nm 135 | [fn] -> PointerType (typeOf fn) (AddrSpace 0) 136 | _ -> error $ "Ambiguous function name: " ++ show nm 137 | where 138 | globalDefs = [g | GlobalDefinition g <- defs] 139 | fnDefByName = [f | f@(Function { name = nm' }) <- globalDefs, nm' == nm] 140 | 141 | --------------------------------------------------------------------------------- 142 | -- Types 143 | ------------------------------------------------------------------------------- 144 | 145 | -- IEEE 754 double 146 | double :: Type 147 | double = FloatingPointType DoubleFP 148 | 149 | void :: Type 150 | void = AST.VoidType 151 | 152 | int :: Type 153 | int = IntegerType 32 154 | 155 | bool :: Type 156 | bool = IntegerType 1 157 | 158 | str :: Type 159 | str = PointerType (IntegerType 8) (AddrSpace 0) 160 | 161 | charArrType :: Int -> Type 162 | charArrType len = ArrayType (fromIntegral $ len) (IntegerType 8) 163 | 164 | ------------------------------------------------------------------------------- 165 | -- Names 166 | ------------------------------------------------------------------------------- 167 | 168 | type Names = Map.Map ShortByteString Int 169 | 170 | uniqueName :: ShortByteString -> Names -> (ShortByteString, Names) 171 | uniqueName nm ns = 172 | case Map.lookup nm ns of 173 | Nothing -> (nm, Map.insert nm 1 ns) 174 | Just ix -> (nm <> fromString (show ix), Map.insert nm (ix+1) ns) 175 | 176 | ------------------------------------------------------------------------------- 177 | -- Codegen State 178 | ------------------------------------------------------------------------------- 179 | 180 | type SymbolTable = [(ShortByteString, Operand)] 181 | 182 | data CodegenState 183 | = CodegenState { 184 | fnName :: Name 185 | , currentBlock :: Name -- Name of the active block to append to 186 | , blocks :: Map.Map Name BlockState -- Blocks for function 187 | , symtab :: SymbolTable -- Function scope symbol table 188 | , blockCount :: Int -- Count of basic blocks 189 | , count :: Word -- Count of unnamed instructions 190 | , names :: Names -- Name Supply 191 | } deriving Show 192 | 193 | data BlockState 194 | = BlockState { 195 | idx :: Int -- Block index 196 | , stack :: [Named Instruction] -- Stack of instructions 197 | , term :: Maybe (Named Terminator) -- Block terminator 198 | } deriving Show 199 | 200 | ------------------------------------------------------------------------------- 201 | -- Codegen Operations 202 | ------------------------------------------------------------------------------- 203 | 204 | newtype Codegen a = Codegen { runCodegen :: State CodegenState a } 205 | deriving (Functor, Applicative, Monad, MonadState CodegenState ) 206 | 207 | sortBlocks :: [(Name, BlockState)] -> [(Name, BlockState)] 208 | sortBlocks = sortBy (compare `on` (idx . snd)) 209 | 210 | createBlocks :: CodegenState -> [BasicBlock] 211 | createBlocks m = map makeBlock $ sortBlocks $ Map.toList (blocks m) 212 | 213 | makeBlock :: (Name, BlockState) -> BasicBlock 214 | makeBlock (l, (BlockState _ s t)) = BasicBlock l (reverse s) (maketerm t) 215 | where 216 | maketerm (Just x) = x 217 | maketerm Nothing = error $ "Block has no terminator: " ++ (show l) 218 | 219 | entryBlockName :: ShortByteString 220 | entryBlockName = "entry" 221 | 222 | emptyBlock :: Int -> BlockState 223 | emptyBlock i = BlockState i [] Nothing 224 | 225 | -- emptyCodegen :: CodegenState 226 | -- emptyCodegen = CodegenState (Name "block") (Name entryBlockName) Map.empty [] 1 0 Map.empty 227 | 228 | emptyCodegen' :: ShortByteString -> CodegenState 229 | emptyCodegen' name = CodegenState (Name name) (Name entryBlockName) Map.empty [] 1 0 Map.empty 230 | 231 | -- execCodegen :: Codegen a -> CodegenState 232 | -- execCodegen m = execState (runCodegen m) emptyCodegen 233 | 234 | runStateCodegen :: Codegen a -> ShortByteString -> (a, CodegenState) 235 | runStateCodegen m name = runState (runCodegen m) (emptyCodegen' name) 236 | 237 | -- increase instructions count. For unique names 238 | fresh :: Codegen Word 239 | fresh = do 240 | i <- gets count -- (codegen, word codegen) 241 | modify $ \s -> s { count = 1 + i } -- (codegen,()) 242 | return $ i + 1 243 | 244 | freshStrName :: Codegen String 245 | freshStrName = do 246 | n <- fresh 247 | (Name bname) <- getBlock 248 | return $ "str." ++ (BS.unpack.fromShort $ bname) ++ "." ++ (show n) 249 | 250 | instr :: Type -> Instruction -> Codegen (Operand) 251 | instr ty ins = do 252 | n <- fresh 253 | let ref = (UnName n) 254 | blk <- current 255 | let i = stack blk 256 | modifyBlock (blk { stack = (ref := ins) : i } ) -- add named instruction to current block's stack 257 | return $ local ty ref 258 | 259 | unnminstr :: Instruction -> Codegen () 260 | unnminstr ins = do 261 | blk <- current 262 | let i = stack blk 263 | modifyBlock (blk { stack = (Do ins) : i } ) 264 | 265 | terminator :: Named Terminator -> Codegen (Named Terminator) 266 | terminator trm = do 267 | blk <- current 268 | modifyBlock (blk { term = Just trm }) 269 | return trm 270 | 271 | ------------------------------------------------------------------------------- 272 | -- Block Stack 273 | ------------------------------------------------------------------------------- 274 | 275 | entry :: Codegen Name 276 | entry = gets currentBlock 277 | 278 | -- given block name 279 | addBlock :: ShortByteString -> Codegen Name 280 | addBlock bname = do 281 | (Name fname) <- getFnName 282 | bls <- gets blocks 283 | ix <- gets blockCount 284 | nms <- gets names 285 | let new = emptyBlock ix 286 | (qname, supply) = uniqueName (toShortBS $ (toString fname) ++ "." ++ (toString bname)) nms 287 | modify $ \s -> s { blocks = Map.insert (Name qname) new bls 288 | , blockCount = ix + 1 289 | , names = supply 290 | } 291 | return (Name qname) 292 | where toShortBS = toShort . BS.pack 293 | toString = BS.unpack . fromShort 294 | 295 | setBlock :: Name -> Codegen Name 296 | setBlock bname = do 297 | modify $ \s -> s { currentBlock = bname } 298 | return bname 299 | 300 | getBlock :: Codegen Name 301 | getBlock = gets currentBlock 302 | 303 | getFnName :: Codegen Name 304 | getFnName = gets fnName 305 | 306 | modifyBlock :: BlockState -> Codegen () 307 | modifyBlock new = do 308 | active <- gets currentBlock 309 | modify $ \s -> s { blocks = Map.insert active new (blocks s) } 310 | 311 | current :: Codegen BlockState 312 | current = do 313 | c <- gets currentBlock 314 | blks <- gets blocks 315 | case Map.lookup c blks of 316 | Just x -> return x 317 | Nothing -> error $ "No such block: " ++ show c 318 | 319 | ------------------------------------------------------------------------------- 320 | -- Symbol Table 321 | ------------------------------------------------------------------------------- 322 | 323 | assign :: ShortByteString -> Operand -> Codegen () 324 | assign var x = do 325 | lcls <- gets symtab 326 | modify $ \s -> s { symtab = [(var, x)] ++ lcls } 327 | 328 | getvar :: ShortByteString -> Type -> Codegen Operand 329 | getvar var ty = do 330 | syms <- gets symtab 331 | case lookup var syms of 332 | Just x -> return x 333 | Nothing -> return $ getGvar var ty -- if not in symtab then it's a global, or doesn't exist 334 | -- error $ "unkown variable" ++ show var 335 | 336 | getGvar :: ShortByteString -> Type -> Operand 337 | getGvar var ty = ConstantOperand $ global (PointerType ty (AddrSpace 0)) (Name var) 338 | 339 | ------------------------------------------------------------------------------- 340 | 341 | local :: Type -> Name -> Operand 342 | local = LocalReference 343 | 344 | global :: Type -> Name -> C.Constant 345 | global = C.GlobalReference 346 | 347 | externf :: Type -> Name -> Operand 348 | externf ty nm = ConstantOperand (C.GlobalReference ty nm) 349 | 350 | -- Arithmetic and Constants 351 | fadd :: Operand -> Operand -> Codegen Operand 352 | fadd a b = instr float $ FAdd NoFastMathFlags a b [] 353 | 354 | fsub :: Operand -> Operand -> Codegen Operand 355 | fsub a b = instr float $ FSub NoFastMathFlags a b [] 356 | 357 | fmul :: Operand -> Operand -> Codegen Operand 358 | fmul a b = instr float $ FMul NoFastMathFlags a b [] 359 | 360 | fdiv :: Operand -> Operand -> Codegen Operand 361 | fdiv a b = instr float $ FDiv NoFastMathFlags a b [] 362 | 363 | fcmp :: FP.FloatingPointPredicate -> Operand -> Operand -> Codegen Operand 364 | fcmp cond a b = instr float $ FCmp cond a b [] 365 | 366 | icmp :: IP.IntegerPredicate -> Operand -> Operand -> Codegen Operand 367 | icmp cond a b = instr bool $ ICmp cond a b [] 368 | 369 | cons :: C.Constant -> Operand 370 | cons = ConstantOperand 371 | 372 | uitofp :: Type -> Operand -> Codegen Operand 373 | uitofp ty a = instr float $ UIToFP a ty [] 374 | 375 | sitofp :: Type -> Operand -> Codegen Operand 376 | sitofp ty a = instr float $ SIToFP a ty [] 377 | 378 | nowrap :: Bool 379 | nowrap = False 380 | 381 | iadd :: Operand -> Operand -> Codegen Operand 382 | iadd a b = instr int $ Add nowrap nowrap a b [] 383 | 384 | imul :: Operand -> Operand -> Codegen Operand 385 | imul a b = instr int $ Mul nowrap nowrap a b [] 386 | 387 | isub :: Operand -> Operand -> Codegen Operand 388 | isub a b = instr int $ Sub nowrap nowrap a b [] 389 | 390 | idiv :: Operand -> Operand -> Codegen Operand 391 | idiv a b = instr int $ SDiv False a b [] 392 | 393 | imod :: Operand -> Operand -> Codegen Operand 394 | imod a b = instr int $ SRem a b [] 395 | 396 | band :: Operand -> Operand -> Codegen Operand 397 | band a b = instr (IntegerType 1) $ And a b [] 398 | 399 | bor :: Operand -> Operand -> Codegen Operand 400 | bor a b = instr (IntegerType 1) $ Or a b [] 401 | 402 | -- toArgs :: [Operand] -> [(Operand, [A.ParameterAttribute])] 403 | -- toArgs = map (\x -> (x, [])) 404 | 405 | -- Effects 406 | call :: Operand -> [(Operand, [A.ParameterAttribute])] -> Codegen Operand 407 | call fn args = do -- figure out the signature, and typecast args as necessary 408 | let (opers, attrs) = unzip args 409 | ts = extractParams fn 410 | tcastOpers <- mapM tycast (zip ts opers) 411 | -- error $ show tcastOpers 412 | instr (extractFnRetType fn) $ Call Nothing CC.C [] (Right fn) (zip tcastOpers attrs) [] [] 413 | 414 | callNoCast :: Operand -> [(Operand, [A.ParameterAttribute])] -> Codegen Operand 415 | callNoCast fn args = instr (extractFnRetType fn) $ Call Nothing CC.C [] (Right fn) args [] [] 416 | 417 | -- UnNamed instruction Call. Used when return type is void 418 | call' :: Operand -> [(Operand, [A.ParameterAttribute])] -> Codegen () 419 | call' fn args = do -- figure out the signature, and typecast args as necessary 420 | let (opers, attrs) = unzip args 421 | ts = extractParams fn 422 | tcastOpers <- mapM tycast (zip ts opers) 423 | unnminstr $ Call Nothing CC.C [] (Right fn) (zip tcastOpers attrs) [] [] 424 | -- unnminstr $ Call Nothing CC.C [] (Right fn) args [] [] 425 | 426 | alloca :: Type -> Codegen Operand 427 | alloca ty = instr ty $ Alloca ty Nothing 0 [] 428 | 429 | -- same as alloca but returns a pointer to given data type (*ty) 430 | alloca' :: Type -> Codegen Operand 431 | alloca' ty = instr (PointerType ty (AddrSpace 0)) $ Alloca ty Nothing 0 [] 432 | 433 | store :: Operand -> Operand -> Codegen () 434 | store ptr val = 435 | if (isFloat' ptr) && (isIntVal $ val) 436 | then (sitofp double val) >>= -- typecast int to real 437 | \val' -> unnminstr $ Store False ptr val' Nothing 0 [] 438 | else unnminstr $ Store False ptr val Nothing 0 [] 439 | 440 | load :: Type -> Operand -> Codegen Operand 441 | load ty ptr = instr ty $ Load False ptr Nothing 0 [] 442 | 443 | -- Control Flow 444 | br :: Name -> Codegen (Named Terminator) 445 | br val = terminator $ Do $ Br val [] 446 | 447 | cbr :: Operand -> Name -> Name -> Codegen (Named Terminator) 448 | cbr cond tr fl = terminator $ Do $ CondBr cond tr fl [] 449 | 450 | phi :: Type -> [(Operand, Name)] -> Codegen Operand 451 | phi ty incoming = instr ty $ Phi ty incoming [] 452 | 453 | ret :: Operand -> Codegen (Named Terminator) 454 | ret val = terminator $ Do $ Ret (Just val) [] 455 | 456 | retvoid :: Codegen (Named Terminator) 457 | retvoid = terminator $ Do $ Ret Nothing [] 458 | 459 | ------------------------------------------------------------------------------- 460 | -- Helpers used for typecasting stuff 461 | ------------------------------------------------------------------------------- 462 | 463 | isIntVal x = case x of 464 | LocalReference (IntegerType _) _ -> True 465 | ConstantOperand (C.Int _ _) -> True 466 | _ -> False 467 | 468 | isFloat x = case x of 469 | (FloatingPointType _) -> True 470 | (PointerType (FloatingPointType _) _) -> True 471 | _ -> False 472 | 473 | isFloat' x = case x of 474 | (LocalReference (FloatingPointType _) _) -> True 475 | (ConstantOperand (C.GlobalReference (PointerType (FloatingPointType _) _) _) ) -> True 476 | _ -> False 477 | 478 | tycast (dst, oper) = 479 | if (isFloat dst) && (isIntVal oper) 480 | then sitofp double oper -- typecast int to real 481 | else return oper 482 | 483 | extractParams (ConstantOperand (C.GlobalReference fn _)) = 484 | case fn of (PointerType (FunctionType _ ts _) _) -> ts 485 | 486 | extractFnRetType (ConstantOperand (C.GlobalReference fn _)) = 487 | case fn of (PointerType (FunctionType t _ _) _) -> t 488 | -------------------------------------------------------------------------------- /test/Test.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module Main where 4 | 5 | import Text.Parsec 6 | import Text.Parsec.String 7 | import Text.Parsec.Combinator 8 | import Utils 9 | import Paskell 10 | import ExtraParsers 11 | import KeywordParse 12 | import Grammar 13 | import TypeCheck 14 | import qualified ConvertIR as Conv 15 | import qualified Emit as E 16 | import Codegen (emptyModule) 17 | 18 | import Control.Exception 19 | import Control.Monad 20 | import Test.HUnit 21 | 22 | assertException :: (Exception e, Eq e) => e -> IO a -> IO () 23 | assertException ex action = 24 | handleJust isWanted (const $ return ()) $ do 25 | action 26 | assertFailure $ "Expected exception: " ++ show ex 27 | where isWanted = guard . (== ex) 28 | 29 | checkPass :: Parser a -> String -> Bool 30 | checkPass p inp = case p' p inp of Right _ -> True 31 | Left _ -> False 32 | checkFail p inp = not $ checkPass p inp 33 | 34 | test' :: [(Parser a, Bool, String)] -> IO () 35 | test' testcases = do 36 | let res = map (\(p, doPass, inp) -> (if doPass then checkPass else checkFail) p inp) testcases 37 | putStrLn $ "Running " ++ (show $ length res) ++ " tests:" 38 | mapM putStrLn $ (`map` (enumerate res)) 39 | (\(i, x) -> 40 | (show $ i+1) ++ (if x then "... OK" else "... FAILED")) 41 | putStrLn $ "Result: " ++ (show $ length $ filter id res) ++ "/" ++ (show $ length res) ++ " tests passed!" 42 | return () 43 | 44 | -- testParser parseIdent True ["X", "111"] 45 | 46 | testParser p doPass xs = let tuple3 (a,b) c = (a,b,c) 47 | in test' $ zipWith tuple3 (repeat (p, doPass)) xs 48 | 49 | tparseIdent = do 50 | testParser parseIdent True [ 51 | "X1", 52 | "x1", 53 | "x1 ", 54 | "Xyz1 ", 55 | "X1z ", 56 | "xyz", 57 | "xYz ", 58 | "X1@ ", -- should only consume X1 59 | "var123 " ] 60 | testParser parseIdent False [ 61 | "", 62 | "1123", 63 | "1X1 ", 64 | " xyz", 65 | "Var" ] 66 | 67 | tparseKeywords = do 68 | testParser parseKWand True [ 69 | "and", 70 | "ANd", 71 | "and ", 72 | "ANd \n " ] 73 | testParser parseKWand False [ 74 | "and123", 75 | "sand" ] 76 | 77 | tparseIdentList = do 78 | testParser parseIdentList True [ 79 | "x,y", 80 | "x, y ", 81 | "x1 , y2 , Z ", 82 | "var1 , y2 , Z ", 83 | "x1 , y2 , Z" ] 84 | testParser parseIdentList False [ 85 | "", 86 | "x1, var1, var", 87 | "x1, @, x2" ] 88 | 89 | -- tparseVarDecl = do 90 | -- testParser parseVarDecl True [ 91 | -- "var x,y:char;", 92 | -- "var x,y:char; ", 93 | -- "var x,y:char1; ", 94 | -- "var x,y:char;abc", 95 | -- "var x , y : char ; abc", 96 | -- "var x,y:char; var1 , var2, var3: char ; abc", 97 | -- "var x,y:char; var" ] 98 | -- testParser parseVarDecl False [ 99 | -- "var", 100 | -- "x,y:char;", 101 | -- "var x,y:char@;", 102 | -- "var x,var:char;", 103 | -- "var :char; x2:char;" ] 104 | 105 | -- tparseTypeDecl = do 106 | -- testParser parseTypeDecl True [ 107 | -- "type x,y=char;", 108 | -- "type x,y=char1;", 109 | -- "type x,y=char; ", 110 | -- "type x,y=char;abc", 111 | -- "type x , y = char ; abc", 112 | -- "type x,y=char; var1 , var2, var3= char ; abc", 113 | -- "type x,y=char; type", 114 | -- "type x,y=char; z=boolean; type w=char;abc " ] 115 | -- testParser parseTypeDecl False [ 116 | -- "type", 117 | -- "x,y=char;", 118 | -- "x,y=char;", 119 | -- "type x,y=char@;", 120 | -- "type x,type=char;", 121 | -- "type =char; x2=char;" ] 122 | 123 | tparseBlock = do 124 | putStrLn "Testing parseBlock" 125 | testParser (parseBlock<*eof) True [ 126 | "var x,y:char; z:boolean; var w:char; begin x:=1 end ", 127 | "var x,y:char; z:boolean; var w:char; begin x:=1; y:=2 end ", 128 | "var x,y:char; z:boolean; var w:char; type x,y=char; var1 , var2, var3= char ; begin x:=1 end ", 129 | "var x , y : char ; z:boolean; var w:char; type x , y = char ; var1 , var2, var3= char ; begin x:=1 end", 130 | "type x,y=char; z=boolean; type w=char;begin x:=1 end ", 131 | "type x,y=char; z=boolean; type w=char;" ++ " type x,y=char; z=boolean; type w=char;" 132 | ++ "var x2,y2:char; z2:boolean; var w2:char; begin x:=1 end ", 133 | "begin x:=1 end", 134 | "begin for x:=1 to 2 do begin x:=3; x:=4 end end", 135 | "begin x:=1; end", 136 | "begin end", 137 | "begin ; ; end" ] 138 | testParser (parseBlock<*eof) False [ 139 | "", 140 | "@abc ", 141 | "type", 142 | "var", 143 | "var x=char;", 144 | "var x:char; type y:char;", 145 | "var x:char; type var=char;", 146 | "var x,y:char; z:boolean; var w:char; begin x:=1; y:=2 ", 147 | "var x,y:char; z:boolean; var w:char; begin x:=1; y:=2 endabcd", 148 | "var x,y:char; z:boolean; var w:char; begin x:=1; y:=2 end abcd", 149 | "var x,y:char; z:boolean; var w:char; type x,y=char; var1 , var2, var3= char ; begin 5 end ", 150 | "begin x:=1 end.", 151 | " begin x:=1 end", 152 | "begin 123 end", 153 | "begin x:=1 x:=2 end" ] 154 | 155 | tparseProgram = do 156 | putStrLn "Testing parseProgram" 157 | testParser parseProgram True [ 158 | "program mypro; var x:char;begin end.", 159 | "program mypro; var x:char1;begin end.", 160 | "program mypro ; var x : char;begin end. ", 161 | "program mypro;begin end.", 162 | "program mypro; var x:char; type t=boolean;begin end ." ] 163 | testParser parseProgram False [ 164 | "program mypro; var x:char;begin end", 165 | "program mypro; var x:char@;begin end.", 166 | " mypro; var x:char;begin end.", 167 | " program mypro var x : char;.begin end ", 168 | "program var;begin end.", 169 | "program p;", 170 | "program p;var x:char;", 171 | "program mypro; var x:char; var begin end." ] 172 | 173 | tparseOP = do 174 | testParser parseOP True $ (map fst operators) ++ [ 175 | "+123", 176 | "== ", -- should work! only consumes "=" 177 | "divxyz ", -- works too! 178 | "<> " ] 179 | testParser parseOP False ["", "@", ":", " +"] 180 | 181 | tparseDesigProp = do 182 | testParser parseDesigProp True [ 183 | ".x", 184 | ". x abcd ", 185 | ".var123" ] 186 | 187 | tparseDesignator = undefined 188 | 189 | tparseTerm = do 190 | testParser parseTerm True [ 191 | "nil ", 192 | "true", 193 | "x", 194 | "true and x", 195 | "'hello'", 196 | "'hello' * '123'", 197 | "true and false ", 198 | "true mod 'hello'", 199 | -- follwing cases will succeed by only consuming some of the input 200 | "true abcd", 201 | "true + false", 202 | "'hello' '123'", 203 | "true false", 204 | "true *"] 205 | testParser parseTerm False [ 206 | "", 207 | "* true"] 208 | 209 | -- tparseString = do 210 | -- testParser parseString True [ 211 | -- ] 212 | 213 | -- mapM (\s -> putStrLn $ s ++ " :\n" ++ show (p' parseString s)) [ 214 | -- "''", 215 | -- "' ' ", 216 | -- "'abc' ", 217 | -- "' ab ' ", 218 | -- "' \\\\abc' ", 219 | -- "' \ \nabc' ", 220 | -- "' \\xyz' ", 221 | -- "' \\\\ ' ", 222 | -- "'\\ab\\\\c' " ] 223 | 224 | -- -- testParser parseString False [ 225 | -- -- "' \\abc' ", 226 | -- -- ] 227 | 228 | tparseSimpleExpr = undefined 229 | 230 | tparseExpr = do 231 | testParser (parseExpr<*eof) True [ 232 | "1 ", 233 | "x", 234 | "(1)", 235 | "1+2", 236 | "1+2+3+4", 237 | "x+y+z", 238 | "1*2", 239 | "1*2*3*4", 240 | "-1+2", 241 | "-1+2+3", 242 | "1+2*3+4", 243 | "1*2+3*4", 244 | "1+2*3*4", 245 | "1*2*3+4", 246 | "1*2*x+4", 247 | "x*2*3+4", 248 | "(1+2)", 249 | "(1+x)", 250 | "(-1+2)", 251 | "(-1+2+3)", 252 | "(1+2*3+4)", 253 | "(1*2+3*4)", 254 | "(1+2*3*4)", 255 | "(1*2*3+4)", 256 | "(1*2*x+4)", 257 | "(x*2*3+4)", 258 | "-(1+2)", 259 | "-(1+2)+3", 260 | "-1", 261 | "-1*2", 262 | "1*(-2)", 263 | "1*(-2+3)", 264 | "1*(-2*3)", 265 | "1+(2+3)*4"] 266 | testParser (parseExpr<*eof) False [ 267 | "1*-2"] 268 | 269 | 270 | tparseStmntList = undefined 271 | 272 | tparseIf = undefined 273 | 274 | tparseStatement = undefined 275 | 276 | tparseNumber = do 277 | testParser parseNumber True [ 278 | "123", 279 | "0123", 280 | "12.3", 281 | "123 ", 282 | "0123abc", 283 | "12.3 ", 284 | "12.3abc"] 285 | testParser parseNumber False [ 286 | "123.", 287 | ".123", 288 | "a123"] 289 | 290 | tparseFor = do 291 | testParser parseFor True [ 292 | "for x:= 2 downto 10 do y := 3 ", 293 | "for x:= 2 downto 10 do y :=3", 294 | "for x:= w downto 10.5 do y :=3", 295 | "for x123:= 3+3 to 5-5 do begin y:=3;z:=10+5end"] 296 | testParser parseFor False [ 297 | "", 298 | "x:= 2 downto 10 do y := 3 ", 299 | "for x : 2 downto 10 do y :=3", 300 | "for123 x:= 3+3 to 5-5 do begin y:=3;z:=10+5end", 301 | "for x:= 3+3 down 5-5 do y:=true"] 302 | 303 | 304 | tgettype = runTestTT $ TestList [ 305 | let s = "x := 1" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYint)]], [])) $ Right ([], [[("x", TYint)]], []), 306 | let s = "x := 1+1" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYint)]], [])) $ Right ([], [[("x", TYint)]], []), 307 | let s = "x := 1+1*2" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYint)]], [])) $ Right ([], [[("x", TYint)]], []), 308 | let s = "x := 1.2" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYreal)]], [])) $ Right ([], [[("x", TYreal)]], []), 309 | let s = "x := 1.2 + 1.2" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYreal)]], [])) $ Right ([], [[("x", TYreal)]], []), 310 | let s = "x := 1 + 1.2" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYreal)]], [])) $ Right ([], [[("x", TYreal)]], []), 311 | let s = "x := 1.2 + 1" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYreal)]], [])) $ Right ([], [[("x", TYreal)]], []), 312 | let s = "x := True or False" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYbool)]], [])) $ Right ([], [[("x", TYbool)]], []), 313 | let s = "x := +1" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYint)]], [])) $ Right ([], [[("x", TYint)]], []), 314 | let s = "x := + 1.2" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYreal)]], [])) $ Right ([], [[("x", TYreal)]], []), 315 | let s = "x := (1 < 2)" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYbool)]], [])) $ Right ([], [[("x", TYbool)]], []), 316 | let s = "x := (1 < 2.3)" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYbool)]], [])) $ Right ([], [[("x", TYbool)]], []), 317 | let s = "x := (true > false)" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYbool)]], [])) $ Right ([], [[("x", TYbool)]], []), 318 | let s = "x := ('a' = 'b')" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYbool)]], [])) $ Right ([], [[("x", TYbool)]], []), 319 | let s = "x := true or (1 < 2)" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYbool)]], [])) $ Right ([], [[("x", TYbool)]], []), 320 | let s = "x := True or y" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYbool), ("y", TYbool)]], [])) $ Right ([], [[("x", TYbool), ("y", TYbool)]], []), 321 | let s = "x := (1 < y) or z" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYbool), ("z", TYbool), ("y", TYreal)]], [])) $ Right ([], [[("x", TYbool), ("z", TYbool), ("y", TYreal)]], []) ] 322 | -- Fail: 323 | -- foo "x := True or y" [("x", TYbool), ("y", TYint)] [("x", TYbool), ("y", TYint)] 324 | 325 | ttypechkProgram = let chk pr = case p' parseProgram pr of 326 | Right p -> typechkProgram p 327 | Left err -> error $ "Parse error:\n\t" ++ pr ++ "\nin:" ++ show err 328 | in runTestTT $ TestList [ 329 | let s = "program p; var x:boolean; begin end." in TestCase $ (flip (assertEqual s)) (chk s) $ Right (), 330 | let s = "program p; var x:integer; begin x:=1 end." in TestCase $ (flip (assertEqual s)) (chk s) $ Right (), 331 | let s = "program p; var x:integer; function f(x:boolean):boolean; begin x:=true end; begin x:=1 end." in TestCase $ (flip (assertEqual s)) (chk s) $ Right (), 332 | let s = "program p; var x:integer; function f(x:boolean):boolean; begin x:=true end; function g(y:integer):boolean; begin y:=0 end; begin if f(g(x)) then x:=1 end." in TestCase $ (flip (assertEqual s)) (chk s) $ Right (), 333 | let s = "program p; var x:integer; function f(x:boolean):boolean; begin x:=true end; function g(y:integer):boolean; begin y:=0 end; begin if g(f(x)) then x:=1 end." in TestCase $ (flip (assertEqual s)) (chk s) $ Left (ArgTypeMismatch TYbool TYint)] 334 | 335 | ttypechkIf = runTestTT $ TestList [ 336 | let s = "if true then x:=1" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYint)]], [])) $ Right ([], [[("x", TYint)]], []), 337 | let s = "if true then x:=1 else x:= 2" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYint)]], [])) $ Right ([], [[("x", TYint)]], []), 338 | let s = "if (5*5) then x:=1" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYint)]], [])) $ Left $ TypeMismatch TYbool TYint, 339 | let s = "if true then x:=1 else x:= false" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYint)]], [])) $ Left $ TypeMismatch TYint TYbool, 340 | let s = "if true then x:= false else x:=2" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYint)]], [])) $ Left $ TypeMismatch TYint TYbool ] 341 | 342 | ttypechkFor = runTestTT $ TestList [ -- todo add TYchar test cases 343 | let s = "for x:=1 to 10 do x:=1" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYint)]], [])) $ Right ([], [[("x", TYint)]], []), 344 | let s = "for x:=5*x to 1 do y:=true" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYint), ("y", TYbool)]], [])) $ Right ([], [[("x", TYint), ("y", TYbool)]], []), 345 | let s = "for x:=1 to true do x:=2" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYint)]], [])) $ Left $ TypeMismatch TYint TYbool, 346 | let s = "for x:=5*x to 1 do x:=true" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYbool)]], [])) $ Left $ TypeMismatchOrd TYbool, 347 | let s = "for x:=1.5 to 10.5 do y:=true" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYreal), ("y", TYbool)]], [])) $ Left $ TypeMismatchOrd TYreal, 348 | let s = "for x:=1.5 to 10.5 do y:=true" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYint), ("y", TYbool)]], [])) $ Left $ TypeMismatch TYint TYreal, 349 | let s = "for x:=1 to 1.5 do y:=true" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYint), ("y", TYbool)]], [])) $ Left $ TypeMismatch TYint TYreal, 350 | let s = "for x:=1*z to 10 do y:=true" in TestCase $ (flip (assertEqual s)) (typechkStr s ([], [[("x", TYint), ("y", TYbool)]], [])) $ Left $ NotInScope ("z") ] 351 | 352 | tparseDeclProc = do 353 | testParser (parseDeclProc <* eof) True [ 354 | "procedure fo; begin end; ", 355 | "procedure fo; var x,y: integer; begin end;", 356 | "procedure fo(); type x=boolean; begin end;", 357 | "procedure fo; begin x:= 1 end;", 358 | "procedure fo; begin x:= 1 end;", 359 | "procedure fo(); begin x:= 1 end;", 360 | "procedure fo(p1:char); begin end;", 361 | "procedure fo(p1:char); var x,y: integer; begin end;", 362 | "procedure fo(p1:char); type x=boolean; begin end;", 363 | "procedure fo(p1:char); begin x:= 1 end;", 364 | "Procedure fo(p1:char); begin x:= 1 end;", 365 | "procedure fo(p1:char); begin x:= 1 end;", 366 | "procedure fo(p1,p2:char; p3:real); begin x:= 1 end ; " ] 367 | testParser (parseDeclProc <* eof) False [ 368 | "", 369 | "procedure fo; begin end; abc", 370 | "procedure fo;", 371 | "procedure fo begin end; ", 372 | "procedure fo; begin end ", 373 | "procedure var; begin end;", 374 | "procedure fo(;); begin end;", 375 | "procedure fo(p1:char;); begin end;" ] 376 | 377 | tparseDeclFunc = do 378 | testParser (parseDeclFunc <* eof) True [ 379 | "function fo:integer; begin end;", 380 | "function fo:mytype; var x,y: integer; begin end;", 381 | "function fo():real; type x=boolean; begin end;", 382 | "function fo : real; begin x:= 1 end;", 383 | "Function fo : real; begin x:= 1 end;", 384 | "function fo() : real; begin x:= 1 end;", 385 | "function fo(p1:char): real; begin end;", 386 | "function fo(p1:char): real ; var x,y: integer; begin end;", 387 | "function fo(p1:char) : real; type x=boolean; begin end;", 388 | "function fo(p1:char):real ; begin x:= 1 end;", 389 | "function fo(p1:char): real; begin x:= 1 end;", 390 | "function fo(p1:char): real; begin x:= 1 end;", 391 | "function fo(p1,p2:char; p3:real): real; begin x:= 1 end; " ] 392 | testParser (parseDeclFunc <* eof) False [ 393 | "", 394 | "function fo:integer; begin end; abc", 395 | "function fo;", 396 | "function fo:integer begin end;", 397 | "function fo:integer; begin end ", 398 | "function fo; begin end ", 399 | "function var; begin end;", 400 | "function fo(;); begin end;", 401 | "function fo(p1:char;); begin end;", 402 | "function fo: real;", 403 | "function fo: real; begin end ", 404 | "function var: real; begin end;", 405 | "function fo(;): real; begin end;", 406 | "function fo(p1:char;): real; begin end;" ] 407 | 408 | 409 | tEmitProgram p = E.codegen (emptyModule "MainModule") p >>= putStrLn.snd 410 | 411 | sampleProgs = [ 412 | "program p;type t1=integer; t2=t1;var x:integer; x1:t1; x2:t2;" 413 | ++ "begin x1:=1; x2:=2; x:=x1; x:=x2; x1:=x; x1:=x2; x2:=x; x2:=x1;end.", 414 | "program p; type tt = integer;function f():tt;var x : integer;begin f:=x;end;begin writeln(f()) end.", 415 | "program p; type tt = integer;function f():integer;var x : tt;begin f:=x;end;begin writeln(f()) end.", 416 | "program p;procedure foo (var x:integer; y:real); begin x := 1; y := 2.5 end;" 417 | ++ "function bar(z:integer) : integer; begin foo(z, 3.14); bar := z end;" 418 | ++ "begin writeln(bar(99)) end.", 419 | "program p;function foo (var x:integer; y:real):integer; begin x := 1; y := 2.5 end;" 420 | ++ "function bar(z:integer) : integer; begin if foo(z, 3.14) > 2 then; bar := z end;" 421 | ++ "begin writeln(bar(99)) end.", 422 | "program fib; function fib(n:integer):integer; begin if n < 2 then fib := 1 else fib := fib(n-1)+fib(n-2) end; begin writeln('fib 5 = ', fib(5)) end.", 423 | "program fib; function fib(n:integer):integer; begin if n < 2 then fib := 1 else fib := fib(n-1)+fib(n-2) end; procedure p(n:integer); begin writeln('fib = ', fib(n)) end; begin p(1); p(5); p(10) end.", 424 | "program fib; var i : integer;function fib(n : integer) : integer; begin if n < 2 then fib := 1 else fib := fib(n-1) + fib(n-2) end; " 425 | ++ "procedure p(n:integer); begin writeln('fib = ', fib(n)) end; begin for i:= 0 to 10 do p(i) end.", 426 | "program p; begin write('hello world') end.", 427 | "program p; var x:integer; begin write('hello world') end.", 428 | "program p; var s:string; begin write('hello world') end.", 429 | "program p; function f():integer; var s:string; begin s:= 'ok' end; begin end.", 430 | "program p; function f():integer; begin f:=2 end; begin end.", 431 | "program p; var x:integer; function f():integer; begin f:=2 end; begin x:=f() end.", 432 | "program p; function f1 (a:integer; b:boolean) : integer; begin end; begin end.", 433 | "program p; procedure f1 (a:integer); begin end; begin end.", 434 | "program p; procedure f1 (); begin end; begin f1() end.", 435 | "program p; function f1 (a:integer; b:boolean) : integer; begin a := 1 + 1 end; begin end.", 436 | "program p; function f1 (a:integer; b:boolean) : integer; begin a := -1 end; begin end.", 437 | "program p; function f1 (a:integer; b:boolean) : integer; begin a := +1 end; begin end.", 438 | "program p; function f1 (a:real; b:boolean) : integer; begin a := 1.5 + 1 end; begin end.", 439 | "program p; function f1 (a:integer; b:boolean) : integer; begin a := 1 + 1 * 2 + 3 end; begin end.", 440 | "program p; function f2 (a:integer; b:boolean) : integer; var x:integer; begin x:=1 end; begin end.", 441 | "program p; function f():integer; var x:integer; begin x:=1; if x>1 then x:=1 else x:=2 end; begin end.", 442 | "program p; function f():integer; var x,y:integer; begin for x:=0 to 10 do y:=1 end; begin end.", 443 | "program p; function f():integer; var x,y:integer; begin for x:=10 downto 0 do y:=1 end; begin end.", 444 | "program p; function f():integer; var x:integer; begin x:=1; while x>1 do x:=2 end; begin end.", 445 | "program p; var x: char; begin x:='a'; x:='b'; if true then writeln(x); if true then writeln(x); end.", 446 | "program p; var z:integer; procedure f(); var x:integer; begin x:=1; end; begin end.", 447 | "program p; var x:integer; function f(x:integer):integer; begin x:=1 end; begin x:=2 end.", 448 | "program p; var x:real; begin x:= 1 + 1/2 + 1.0/2 + 1*2.0; writeln(x) end.", 449 | "program p; var x:boolean; begin writeln(1 mod 5, -1 mod 5) end.", 450 | "program p; var x:boolean; begin writeln(1 div 5, -1 div 5) end.", 451 | "program p; var x:boolean; begin x:= (1>2) or (3.14 > 2); writeln(x) end.", 452 | "program p; var x:boolean; begin x:= (1>2) and (3.14 > 2); writeln(x) end."] 453 | 454 | tCodegenSampleProg n = 455 | putStrLn "--------------------------------" >> 456 | (print $ sampleProgs !! n) >> 457 | putStrLn "\n------------------" >> 458 | case (Conv.chkConvProgram' <$> sampleProgs) !! n 459 | of Right x -> print x 460 | >> tEmitProgram x 461 | Left x -> error $ x 462 | 463 | tCodegenSampleProgAll = forM_ [0..length sampleProgs -1] tCodegenSampleProg 464 | >> putStrLn "OK. All sample progs compiled" 465 | 466 | testAll = do 467 | tparseKeywords 468 | tparseIdent 469 | tparseIdentList 470 | tparseBlock 471 | tparseProgram 472 | tparseOP 473 | tparseTerm 474 | tparseNumber 475 | tparseFor 476 | tparseExpr 477 | ttypechkIf 478 | ttypechkFor 479 | ttypechkProgram 480 | tgettype 481 | -- tparseVarDecl 482 | -- tparseTypeDecl 483 | -- tparseDesigProp 484 | -- tparseDesignator 485 | -- tparseString 486 | 487 | testFiles = do 488 | let path = "f:/Paskell/pascal-src/" 489 | parseFromFile parseProgram $ path ++ "p1.pas" 490 | 491 | 492 | compileFile path = do 493 | e <- parseFromFile parseProgram path 494 | case e of Left errP -> print errP 495 | Right ast -> case typechkProgram ast of 496 | Left errTC -> print errTC 497 | Right _ -> E.printllvm ast >>= putStrLn 498 | -- foo path = do 499 | -- x <- compileFile path 500 | -- case x 501 | 502 | main = testAll 503 | --------------------------------------------------------------------------------