├── .ghci ├── .gitignore ├── Base.hs ├── Closure.hs ├── CodeGen.hs ├── Containers.hs ├── Core.hs ├── DataCon.hs ├── Dependency.hs ├── Eval.hs ├── Inliner.hs ├── LICENSE ├── Nano.hs ├── Parser.hs ├── PrimGHC.hs ├── PrimOps.hs ├── README.md ├── ScopeCheck.hs ├── Syntax.hs ├── Types.hs ├── _main_old.hs ├── asm_shit ├── rts.asm └── sys_macos.asm ├── bootstrap.sh ├── examples ├── church.nano ├── test.nano └── test5.nano ├── main.hs ├── nanohs.cabal ├── rts.c ├── slides ├── .gitignore ├── nanohs_slides.pdf └── nanohs_slides.tex ├── sloc_count.sh └── topsort └── TopSortSCC.hs /.ghci: -------------------------------------------------------------------------------- 1 | :set -XMagicHash 2 | :set -XStrict 3 | :set -XFlexibleInstances 4 | :set -XFlexibleContexts 5 | :set -XTypeSynonymInstances 6 | :set -XOverloadedLists 7 | :set -XOverloadedStrings -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | bck/ 3 | *.o 4 | *.hi 5 | *.ll 6 | a.out 7 | tmp.c 8 | tmp1.c 9 | tmp2.c 10 | tmp.asm 11 | *.exe 12 | nanohs_stage1.c 13 | nanohs_stage2.c 14 | nanohs_stage3.c 15 | a.txt 16 | test1.nano 17 | test2.nano 18 | test3.nano -------------------------------------------------------------------------------- /Base.hs: -------------------------------------------------------------------------------- 1 | 2 | -- | Base library 3 | 4 | {-# LANGUAGE CPP, NoImplicitPrelude, MagicHash #-} 5 | {-# LANGUAGE Strict #-} 6 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-} 7 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-} 8 | 9 | module Base where 10 | 11 | -------------------------------------------------------------------------------- 12 | 13 | import Prelude ( Int , Char , Eq , Show ) 14 | import PrimGHC 15 | 16 | -------------------------------------------------------------------------------- 17 | -- debugging hacks 18 | 19 | #ifndef __GLASGOW_HASKELL__ 20 | debug_ str x = debug str x x 21 | debug str x y = case action x ActionToken of { Unit -> y } 22 | where { action x = ioseq (putStr ">>> ") (ioseq (putStr str) (print x)) } 23 | #endif 24 | 25 | -------------------------------------------------------------------------------- 26 | -- * Prelude 27 | -- ** functions 28 | 29 | -- data Unit = Unit deriving Show 30 | 31 | id :: a -> a 32 | id = \x -> x 33 | 34 | const :: a -> b -> a 35 | const x = \_ -> x 36 | 37 | app :: (a -> b) -> a -> b 38 | app f x = f x 39 | 40 | compose :: (b -> c) -> (a -> b) -> (a -> c) 41 | compose f g = \x -> f (g x) 42 | 43 | compose3 :: (c -> d) -> (b -> c) -> (a -> b) -> (a -> d) 44 | compose3 f g h = \x -> f (g (h x)) 45 | 46 | flip :: (a -> b -> c) -> (b -> a -> c) 47 | flip f = \x y -> f y x 48 | 49 | fix :: ((a -> b) -> (a -> b)) -> a -> b 50 | fix u x = u (fix u) x 51 | 52 | -- test_fix :: Int -> List Int 53 | -- test_fix x = fix (\rec n -> ifte (eq n 0) Nil (Cons n (rec (dec n)))) x 54 | 55 | -------------------------------------------------------------------------------- 56 | -- ** numbers 57 | 58 | inc :: Int -> Int 59 | inc n = plus n 1 60 | 61 | inc2 :: Int -> Int 62 | inc2 n = plus n 2 63 | 64 | dec :: Int -> Int 65 | dec n = minus n 1 66 | 67 | natRec :: a -> (a -> a) -> Int -> a 68 | natRec z s = go where { go n = ifte (eq n 0) z (s (go (dec n))) } 69 | 70 | ceq :: Char -> Char -> Bool 71 | ceq c d = eq (ord c) (ord d) 72 | 73 | cneq :: Char -> Char -> Bool 74 | cneq c d = not (ceq c d) 75 | 76 | gneq :: Eq a => a -> a -> Bool 77 | gneq x y = not (geq x y) 78 | 79 | neq :: Int -> Int -> Bool 80 | neq x y = not (eq x y) 81 | 82 | gt :: Int -> Int -> Bool 83 | gt x y = lt y x 84 | 85 | ge :: Int -> Int -> Bool 86 | ge x y = le y x 87 | 88 | min :: Int -> Int -> Int 89 | min x y = ifte (le x y) x y 90 | 91 | max :: Int -> Int -> Int 92 | max x y = ifte (le x y) y x 93 | 94 | data Ordering = LT | EQ | GT deriving Show 95 | 96 | compare :: Int -> Int -> Ordering 97 | compare x y = ifte (lt x y) LT (ifte (eq x y) EQ GT) 98 | 99 | -- | the list [0,1,...n-1] 100 | range :: Int -> List Int 101 | range n = rangeFrom 0 n 102 | 103 | -- | the list [k,k+1,...k+n-1] 104 | rangeFrom :: Int -> Int -> List Int 105 | rangeFrom k n = ifte (gt n 0) (Cons k (rangeFrom (inc k) (dec n))) Nil 106 | 107 | -- -- | the list [-n+1,-n+2,...0] == reverse (map negate (range n)) 108 | -- negativeDeBruijnRange :: Int -> List Int 109 | -- negativeDeBruijnRange n = rangeFrom (inc (negate n)) n 110 | 111 | -------------------------------------------------------------------------------- 112 | -- ** Bool 113 | 114 | -- data Bool = True | False deriving Show 115 | 116 | -- These are primops for now so they can short-circuit 117 | -- 118 | -- and :: Bool -> Bool -> Bool 119 | -- and x ~y = case x of { True -> y ; False -> False } 120 | -- 121 | -- or :: Bool -> Bool -> Bool 122 | -- or x ~y = case x of { True -> True ; False -> y } 123 | 124 | -- This is a primop only for efficiency reasons 125 | -- 126 | -- not :: Bool -> Bool 127 | -- not b = case b of { True -> False ; False -> True } 128 | 129 | and3 :: Bool -> Bool -> Bool -> Bool 130 | and3 x y z = and (and x y) z 131 | 132 | andList :: List Bool -> Bool 133 | andList list = case list of { Nil -> True ; Cons b bs -> ifte b (andList bs) False } 134 | 135 | -------------------------------------------------------------------------------- 136 | -- ** Maybe 137 | 138 | -- data Maybe a = Nothing | Just a deriving Show 139 | 140 | isJust :: Maybe a -> Bool 141 | isJust mb = case mb of { Nothing -> False ; Just _ -> True } 142 | 143 | isNothing :: Maybe a -> Bool 144 | isNothing mb = case mb of { Nothing -> True ; Just _ -> False } 145 | 146 | fromJust :: Maybe a -> a 147 | fromJust mb = case mb of { Just x -> x ; Nothing -> error "fromJust" } 148 | 149 | mbfmap :: (a -> b) -> Maybe a -> Maybe b 150 | mbfmap f mb = case mb of { Just x -> Just (f x) ; Nothing -> Nothing } 151 | 152 | catMaybes :: List (Maybe a) -> List a 153 | catMaybes mbs = go mbs where { go list = case list of { Nil -> Nil ; Cons mb rest -> 154 | case mb of { Nothing -> go rest ; Just x -> Cons x (go rest) } } } 155 | 156 | -------------------------------------------------------------------------------- 157 | -- ** Either 158 | 159 | data Either a b = Left a | Right b deriving Show 160 | 161 | isLeft :: Either a b -> Bool 162 | isLeft ei = case ei of { Left _ -> True ; Right _ -> False } 163 | 164 | isRight :: Either a b -> Bool 165 | isRight ei = case ei of { Left _ -> False ; Right _ -> True } 166 | 167 | fromLeft :: Either a b -> a 168 | fromLeft ei = case ei of { Left x -> x ; Right _ -> error "fromLeft" } 169 | 170 | fromRight :: Either a b -> b 171 | fromRight ei = case ei of { Right y -> y ; Left _ -> error "fromRight" } 172 | 173 | -------------------------------------------------------------------------------- 174 | -- ** Pair, Triple 175 | 176 | -- data Pair a b = Pair a b deriving Show 177 | 178 | fst :: Pair a b -> a 179 | fst pair = case pair of { Pair x _ -> x } 180 | 181 | snd :: Pair a b -> b 182 | snd pair = case pair of { Pair _ y -> y } 183 | 184 | swap :: Pair a b -> Pair b a 185 | swap pair = case pair of { Pair x y -> Pair y x } 186 | 187 | first :: (a1 -> a2) -> Pair a1 b -> Pair a2 b 188 | first f pair = case pair of { Pair x y -> Pair (f x) y } 189 | 190 | second :: (b1 -> b2) -> Pair a b1 -> Pair a b2 191 | second g pair = case pair of { Pair x y -> Pair x (g y) } 192 | 193 | data Triple a b c = Triple a b c deriving Show 194 | 195 | fst3 :: Triple a b c -> a 196 | fst3 triple = case triple of { Triple x _ _ -> x } 197 | 198 | snd3 :: Triple a b c -> b 199 | snd3 triple = case triple of { Triple _ y _ -> y } 200 | 201 | thd3 :: Triple a b c -> c 202 | thd3 triple = case triple of { Triple _ _ z -> z } 203 | 204 | -------------------------------------------------------------------------------- 205 | -- ** Lists 206 | 207 | -- data List a = Nil | Cons a (List a) deriving (Eq) 208 | 209 | singleton :: a -> List a 210 | singleton x = Cons x Nil 211 | 212 | head :: List a -> a 213 | head ls = case ls of { Cons x _ -> x ; Nil -> error "head: empty list" } 214 | 215 | tail :: List a -> List a 216 | tail ls = case ls of { Nil -> Nil ; Cons _ xs -> xs } 217 | 218 | isNil :: List a -> Bool 219 | isNil ls = case ls of { Nil -> True ; Cons _ _ -> False } 220 | 221 | mbSingleton :: List a -> Maybe a 222 | mbSingleton list = case list of { Cons x xs -> case xs of { Nil -> Just x ; _ -> Nothing } ; Nil -> Nothing } 223 | 224 | mbPair :: List a -> Maybe (Pair a a) 225 | mbPair list = case list of { Cons x xs -> case xs of { Cons y ys -> case ys of 226 | { Nil -> Just (Pair x y) ; _ -> Nothing } ; Nil -> Nothing } ; Nil -> Nothing } 227 | 228 | length :: List a -> Int 229 | length ls = case ls of { Nil -> 0 ; Cons _ xs -> inc (length xs) } 230 | 231 | index :: Int -> List a -> a 232 | index k ls = case ls of 233 | { Nil -> error "index: out of bounds" 234 | ; Cons x xs -> ifte (eq k 0) x (index (dec k) xs) } 235 | 236 | elem :: Eq a => a -> List a -> Bool 237 | elem x = go where { go ls = case ls of { Nil -> False ; Cons y ys -> ifte (geq x y) True (go ys) } } 238 | 239 | intElem :: Int -> List Int -> Bool 240 | intElem x = go where { go ls = case ls of { Nil -> False ; Cons y ys -> ifte (eq x y) True (go ys) } } 241 | 242 | charElem :: Char -> List Char -> Bool 243 | charElem x = go where { go ls = case ls of { Nil -> False ; Cons y ys -> ifte (ceq x y) True (go ys) } } 244 | 245 | stringElem :: String -> List String -> Bool 246 | stringElem x = go where { go ls = case ls of { Nil -> False ; Cons y ys -> ifte (stringEq x y) True (go ys) } } 247 | 248 | findIndex :: (a -> Bool) -> List a -> Maybe Int 249 | findIndex cond list = go 0 list where { go j ls = case ls of { Nil -> Nothing 250 | ; Cons x xs -> case cond x of { True -> Just j ; _ -> go (inc j) xs }}} 251 | 252 | unsafeFindIndex :: (a -> Bool) -> List a -> Int 253 | unsafeFindIndex cond list = case findIndex cond list of { Just k -> k ; Nothing -> error "unsafeFindIndex: not found" } 254 | 255 | foldl :: (a -> b -> a) -> (a -> List b -> a) 256 | foldl f x0 list = go x0 list where 257 | { go x ls = case ls of { Nil -> x ; Cons y ys -> go (f x y) ys } 258 | } 259 | 260 | flipFoldr :: (b -> a -> a) -> (List b -> a -> a) 261 | flipFoldr f list y0 = go list y0 where 262 | { go ls y = case ls of { Nil -> y ; Cons x xs -> f x (go xs y) } 263 | } 264 | 265 | foldr :: (b -> a -> a) -> (a -> List b -> a) 266 | foldr f x list = flipFoldr f list x 267 | 268 | scanl :: (a -> b -> a) -> (a -> List b -> List a) 269 | scanl f x0 list = go x0 list where 270 | { go x ls = case ls of { Nil -> Cons x Nil ; Cons y ys -> Cons x (go (f x y) ys) } 271 | } 272 | 273 | -- | @scanl_ f a xs = init (scanl f a xs)@ 274 | scanl_ :: (a -> b -> a) -> (a -> List b -> List a) 275 | scanl_ f x0 list = go x0 list where 276 | { go x ls = case ls of { Nil -> Nil ; Cons y ys -> Cons x (go (f x y) ys) } 277 | } 278 | 279 | sum :: List Int -> Int 280 | sum ns = foldl plus 0 ns 281 | 282 | reverse :: List a -> List a 283 | reverse list = foldl (\xs x -> Cons x xs) Nil list 284 | 285 | snoc :: List a -> a -> List a 286 | snoc xs y = case xs of { Nil -> singleton y ; Cons z zs -> Cons z (snoc zs y) } 287 | 288 | append :: List a -> List a -> List a 289 | append xs ys = case xs of { Nil -> ys ; Cons z zs -> Cons z (append zs ys) } 290 | 291 | append3 :: List a -> List a -> List a -> List a 292 | append3 xs ys zs = append xs (append ys zs) 293 | 294 | concat :: List (List a) -> List a 295 | concat lls = flipFoldr append lls Nil 296 | 297 | -- | > reverseAppend xs ys = append (reverse xs) ys 298 | reverseAppend :: List a -> List a -> List a 299 | reverseAppend xs ys = case xs of { Nil -> ys ; Cons z zs -> reverseAppend zs (Cons z ys) } 300 | 301 | map :: (a -> b) -> List a -> List b 302 | map f = go where { go xs = case xs of { Nil -> Nil ; Cons x xs -> Cons (f x) (map f xs) } } 303 | 304 | for :: List a -> (a -> b) -> List b 305 | for xs f = map f xs 306 | 307 | filter :: (a -> Bool) -> List a -> List a 308 | filter f = go where 309 | { go list = case list of 310 | { Nil -> Nil ; Cons x xs -> case f x of 311 | { False -> go xs ; True -> Cons x (go xs) } } } 312 | 313 | replicate :: Int -> a -> List a 314 | replicate n x = go n where { go m = ifte (eq m 0) Nil (Cons x (go (dec m))) } 315 | 316 | take :: Int -> List a -> List a 317 | take n ls = case ls of { Nil -> Nil ; Cons x xs -> ifte (eq n 0) Nil (Cons x (take (dec n) xs)) } 318 | 319 | drop :: Int -> List a -> List a 320 | drop n ls = case ls of { Nil -> Nil ; Cons x xs -> ifte (eq n 0) ls (drop (dec n) xs) } 321 | 322 | takeWhile :: (a -> Bool) -> List a -> List a 323 | takeWhile cond list = go list where 324 | { go ls = case ls of { Nil -> Nil ; Cons x xs -> case cond x of 325 | { True -> Cons x (go xs) ; False -> Nil } } } 326 | 327 | dropWhile :: (a -> Bool) -> List a -> List a 328 | dropWhile cond list = go list where 329 | { go ls = case ls of { Nil -> Nil ; Cons x xs -> case cond x of 330 | { True -> go xs ; False -> ls } } } 331 | 332 | span :: (a -> Bool) -> List a -> Pair (List a) (List a) 333 | span cond xs = Pair (takeWhile cond xs) (dropWhile cond xs) 334 | 335 | zipWith :: (a -> b -> c) -> List a -> List b -> List c 336 | zipWith f as bs = go as bs where { go ls1 ls2 = case ls1 of { Nil -> Nil ; Cons x xs -> case ls2 of 337 | { Nil -> Nil ; Cons y ys -> Cons (f x y) (go xs ys) } } } 338 | 339 | zip :: List a -> List b -> List (Pair a b) 340 | zip xs ys = zipWith Pair xs ys 341 | 342 | unzip :: List (Pair a b) -> Pair (List a) (List b) 343 | unzip xys = case xys of { Nil -> Pair Nil Nil ; Cons this rest -> case this of 344 | { Pair x y -> case unzip rest of { Pair xs ys -> Pair (Cons x xs) (Cons y ys) } } } 345 | 346 | -- | Zip with @[0..n-1]@ 347 | zipIndex :: List a -> List (Pair Int a) 348 | zipIndex xs = zip (range (length xs)) xs 349 | 350 | -- | Zip with @[0..n-1]@ 351 | zipWithIndex :: (a -> Int -> b) -> List a -> List b 352 | zipWithIndex f xs = zipWith f xs (range (length xs)) 353 | 354 | -- | Zip with @replicate n y@ 355 | zipConst :: b -> List a -> List (Pair b a) 356 | zipConst y list = worker list where { worker l = case l of { Nil -> Nil ; Cons x xs -> Cons (Pair y x) (worker xs) }} 357 | 358 | -- | Zip with @(first : repeat rest)@ 359 | zipFirstRest :: b -> b -> List a -> List (Pair b a) 360 | zipFirstRest first rest xs = case xs of { Nil -> Nil ; Cons x xs -> Cons (Pair first x) (zipConst rest xs) } 361 | 362 | -------------------------------------------------------------------------------- 363 | -- ** Characters 364 | 365 | singleQuoteC = chr 39 366 | doubleQuoteC = chr 34 367 | backslashC = chr 92 368 | newlineC = chr 10 369 | carriageReturnC = chr 13 370 | 371 | isDigit :: Char -> Bool 372 | isDigit ch = and (ge k 48) (le k 57) where { k = ord ch } 373 | 374 | mbDigit :: Char -> Maybe Int 375 | mbDigit ch = ifte (isDigit ch) (Just (minus (ord ch) 48)) Nothing 376 | 377 | isUpper :: Char -> Bool 378 | isUpper ch = and (ge k 65) (le k 90) where { k = ord ch } 379 | 380 | isLower :: Char -> Bool 381 | isLower ch = and (ge k 97) (le k 122) where { k = ord ch } 382 | 383 | isAlpha ch = or (isUpper ch) (isLower ch) 384 | isAlphaNum ch = or (isAlpha ch) (isDigit ch) 385 | isLower_ ch = or (ceq ch '_') (isLower ch) 386 | 387 | -------------------------------------------------------------------------------- 388 | -- ** Strings 389 | 390 | -- type String = List Char 391 | 392 | stringEq :: String -> String -> Bool 393 | stringEq a b = go a b where { go str1 str2 = case str1 of 394 | { Nil -> case str2 of { Nil -> True ; _ -> False } 395 | ; Cons x xs -> case str2 of { Nil -> False ; Cons y ys -> and (ceq x y) (go xs ys) }}} 396 | 397 | stringMember :: String -> List String -> Bool 398 | stringMember what ls = case ls of { Nil -> False ; Cons this rest -> case stringEq what this of 399 | { True -> True ; False -> stringMember what rest }} 400 | 401 | charToString :: Char -> String 402 | charToString x = Cons x Nil 403 | 404 | showBool :: Bool -> String 405 | showBool b = case b of { True -> "True" ; False -> "False" } 406 | 407 | showChar :: Char -> String 408 | showChar c = Cons singleQuoteC (Cons c (Cons singleQuoteC Nil)) 409 | 410 | showString :: String -> String 411 | showString s = doubleQuoteString s 412 | 413 | showNat :: Int -> String 414 | showNat n = ifte (lt n 0) (error "showNat: negative") (worker n) where 415 | { worker n = ifte (eq n 0) "0" (reverse (go n)) 416 | ; go n = ifte (eq n 0) Nil (Cons (chr (plus (mod n 10) 48)) (go (div n 10))) 417 | } 418 | 419 | showInt :: Int -> String 420 | showInt n = ifte (ge n 0) (showNat n) (Cons '-' (showNat (negate n))) 421 | 422 | appendInt :: String -> Int -> String 423 | appendInt s n = append s (showInt n) 424 | 425 | readNat :: String -> Maybe Int 426 | readNat str = case str of { Nil -> Nothing ; Cons x xs -> go (reverse str) } where 427 | { go ds = case ds of { Nil -> Just 0 ; Cons x xs -> case mbDigit x of 428 | { Just k -> case go xs of { Just n -> Just (plus k (times n 10)) ; Nothing -> Nothing } 429 | ; Nothing -> Nothing } } } 430 | 431 | readInt :: String -> Maybe Int 432 | readInt str = case str of 433 | { Nil -> Nothing 434 | ; Cons x xs -> ifte (ceq x '-') (mbfmap negate (readNat xs)) (readNat str) } 435 | 436 | quoteChar :: Char -> String 437 | quoteChar x = Cons singleQuoteC (Cons x (Cons singleQuoteC Nil)) 438 | 439 | delimString :: Char -> Char -> String -> String 440 | delimString l r xs = Cons l (append xs (Cons r Nil)) 441 | 442 | backslashEn :: String 443 | backslashEn = [ backslashC , 'n' ] 444 | 445 | backslashDoubleQuote :: String 446 | backslashDoubleQuote = [ backslashC , '"' ] 447 | 448 | doubleQuoteString :: String -> String 449 | doubleQuoteString str = delimString doubleQuoteC doubleQuoteC str 450 | 451 | escapedDoubleQuoteString :: String -> String 452 | escapedDoubleQuoteString str = append3 backslashDoubleQuote str backslashDoubleQuote 453 | 454 | doubleQuoteStringLn :: String -> String 455 | doubleQuoteStringLn str = delimString doubleQuoteC doubleQuoteC (append str backslashEn) 456 | 457 | quoteString :: String -> String 458 | quoteString what= delimString '`' '`' what 459 | 460 | parenString :: String -> String 461 | parenString what = delimString '(' ')' what 462 | 463 | intercalate :: List a -> List (List a) -> List a 464 | intercalate sep llist = go llist where 465 | { go xss = case xss of 466 | { Nil -> Nil ; Cons ys yss -> case yss of 467 | { Nil -> ys 468 | ; _ -> append ys (append sep (go yss)) } } } 469 | 470 | unwords :: List String -> String 471 | unwords list = intercalate (Cons ' ' Nil) list 472 | 473 | unlines :: List String -> String 474 | unlines list = intercalate (Cons newlineC Nil) list 475 | 476 | lines :: String -> List String 477 | lines xs = case xs of { Nil -> Nil ; Cons _ _ -> case span (\x -> cneq x newlineC) xs of 478 | { Pair this rest -> case rest of { Nil -> Cons this Nil ; Cons _ ys -> Cons this (lines ys) } } } 479 | 480 | -------------------------------------------------------------------------------- 481 | -- ** IO Monad 482 | 483 | -- data ActionToken = ActionToken 484 | -- type IO a = ActionToken -> a 485 | 486 | ioret_ :: IO Unit 487 | ioret_ = ioreturn Unit 488 | 489 | iofmap :: (a -> b) -> IO a -> IO b 490 | iofmap f action = iobind action (\x -> ioreturn (f x)) 491 | 492 | iojoin :: IO (IO a) -> IO a 493 | iojoin producer = iobind producer id 494 | 495 | iocompose :: (b -> IO c) -> (a -> IO b) -> a -> IO c 496 | iocompose g f = \x -> iobind (f x) g 497 | 498 | ioliftA2 :: (a -> b -> c) -> IO a -> IO b -> IO c 499 | ioliftA2 f act1 act2 = iobind act1 (\x -> iobind act2 (\y -> ioreturn (f x y))) 500 | 501 | ioseq :: IO a -> IO b -> IO b 502 | ioseq f g = iobind f (\_ -> g) 503 | 504 | iosequence_ :: List (IO a) -> IO Unit 505 | iosequence_ list = case list of { Nil -> ioret_ ; Cons a as -> ioseq a (iosequence_ as) } 506 | 507 | iomapM :: (a -> IO b) -> List a -> IO (List b) 508 | iomapM f list = case list of { Nil -> ioreturn Nil ; Cons x xs -> ioliftA2 Cons (f x) (iomapM f xs) } 509 | 510 | ioforM :: List a -> (a -> IO b) -> IO (List b) 511 | ioforM list f = iomapM f list 512 | 513 | iomapM_ :: (a -> IO b) -> List a -> IO Unit 514 | iomapM_ f list = case list of { Nil -> ioreturn Unit ; Cons x xs -> ioseq (f x) (iomapM_ f xs) } 515 | 516 | ioforM_ :: List a -> (a -> IO b) -> IO Unit 517 | ioforM_ list f = iomapM_ f list 518 | 519 | getArgs :: IO (List String) 520 | getArgs = go 0 where { go k = iobind (getArg k) (\mb -> case mb of 521 | { Nothing -> ioreturn Nil 522 | ; Just this -> iobind (go (inc k)) (\rest -> ioreturn (Cons this rest)) })} 523 | 524 | putStr :: String -> IO Unit 525 | putStr s = hPutStr stdout s 526 | 527 | putStrLn :: String -> IO Unit 528 | putStrLn str = ioseq (putStr str) (putChar (chr 10)) 529 | 530 | type FilePath = String 531 | 532 | hGetContents :: Handle -> IO String 533 | hGetContents h = go h where { go h = iobind (hGetChar h) (\mb -> case mb of 534 | { Nothing -> ioreturn Nil 535 | ; Just y -> iobind (go h) (\ys -> ioreturn (Cons y ys)) }) } 536 | 537 | hPutStrLn :: Handle -> String -> IO Unit 538 | hPutStrLn h str = ioseq (hPutStr h str) (hPutChar h (chr 10)) 539 | 540 | withFile :: FilePath -> IOMode -> (Handle -> IO a) -> IO a 541 | withFile fn mode action = 542 | iobind (openFile fn mode) (\handle -> 543 | iobind (action handle) (\result -> 544 | iobind (hClose handle) (\_ -> ioreturn result))) 545 | 546 | readFile :: FilePath -> IO String 547 | readFile fn = withFile fn ReadMode hGetContents 548 | 549 | writeFile :: FilePath -> String -> IO Unit 550 | writeFile fn text = withFile fn WriteMode (\h -> hPutStr h text) 551 | 552 | writeLines :: FilePath -> List String -> IO Unit 553 | writeLines fn ls = withFile fn WriteMode (\h -> iomapM_ (hPutStrLn h) ls) 554 | 555 | -------------------------------------------------------------------------------- 556 | -- ** State monad 557 | 558 | type State s a = s -> Pair s a 559 | 560 | runState :: State s a -> s -> Pair s a 561 | evalState :: State s a -> s -> a 562 | execState :: State s a -> s -> s 563 | runState f = f 564 | evalState f = compose snd f 565 | execState f = compose fst f 566 | 567 | sreturn :: a -> State s a 568 | sreturn x = \s -> Pair s x 569 | 570 | sfmap :: (a -> b) -> State s a -> State s b 571 | sfmap f action = \s -> case action s of { Pair s' x -> Pair s' (f x) } 572 | 573 | sliftA2 :: (a -> b -> c) -> State s a -> State s b -> State s c 574 | sliftA2 f action1 action2 = \s -> case action1 s of { Pair s' x -> 575 | case action2 s' of { Pair s'' y -> Pair s'' (f x y) } } 576 | 577 | sbind :: State s a -> (a -> State s b) -> State s b 578 | sbind f u = \s -> case f s of { Pair s' x -> u x s' } 579 | 580 | sseq :: State s a -> State s b -> State s b 581 | sseq p q = sbind p (\_ -> q) 582 | 583 | sseq3 :: State s a -> State s b -> State s c -> State s c 584 | sseq3 p q r = sseq p (sseq q r) 585 | 586 | sseq4 :: State s a -> State s b -> State s c -> State s d -> State s d 587 | sseq4 p q r s = sseq p (sseq q (sseq r s)) 588 | 589 | sget :: State s s 590 | sget = \s -> Pair s s 591 | 592 | sput :: s -> State s Unit 593 | sput s = \_ -> Pair s Unit 594 | 595 | smodify :: (s -> s) -> State s Unit 596 | smodify f = \old -> Pair (f old) Unit 597 | 598 | swhen :: Bool -> State s Unit -> State s Unit 599 | swhen b action = ifte b action (sreturn Unit) 600 | 601 | ssequence_ :: List (State s a) -> State s Unit 602 | ssequence_ actions = case actions of { Nil -> sreturn Unit ; Cons u us -> sseq u (ssequence_ us) } 603 | 604 | smapM :: (a -> State s b) -> List a -> State s (List b) 605 | smapM f list = case list of { Nil -> sreturn Nil ; Cons x xs -> sliftA2 Cons (f x) (smapM f xs) } 606 | 607 | sforM :: List a -> (a -> State s b) -> State s (List b) 608 | sforM list f = smapM f list 609 | 610 | smapM_ :: (a -> State s b) -> List a -> State s Unit 611 | smapM_ f list = case list of { Nil -> sreturn Unit ; Cons x xs -> sseq (f x) (smapM_ f xs) } 612 | 613 | sforM_ :: List a -> (a -> State s b) -> State s Unit 614 | sforM_ list f = smapM_ f list 615 | 616 | -------------------------------------------------------------------------------- 617 | -------------------------------------------------------------------------------- /Closure.hs: -------------------------------------------------------------------------------- 1 | 2 | -- | Closure conversion 3 | 4 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-} 5 | {-# LANGUAGE Strict #-} 6 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-} 7 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-} 8 | 9 | module Closure where 10 | 11 | -------------------------------------------------------------------------------- 12 | 13 | import Prelude ( Int , Char , Eq , Show ) 14 | import PrimGHC 15 | 16 | -------------------------------------------------------------------------------- 17 | 18 | import Base 19 | import Containers 20 | import Types 21 | import PrimOps 22 | -- import DataCon 23 | -- import Syntax 24 | import Core 25 | import ScopeCheck 26 | 27 | {-% include "Base.hs" %-} 28 | {-% include "Containers.hs" %-} 29 | {-% include "Types.hs" %-} 30 | {-% include "PrimOps.hs" %-} 31 | -- {-% include "DataCon.hs" %-} 32 | -- {-% include "Syntax.hs" %-} 33 | {-% include "Core.hs" %-} 34 | {-% include "ScopeCheck.hs" %-} 35 | 36 | -------------------------------------------------------------------------------- 37 | -- ** Types 38 | 39 | -- pattern VarF nvar = AtmF (VarA nvar) 40 | -- pattern ConF ncon = AtmF (ConA ncon) 41 | -- pattern KstF lit = AtmF (KstA lit ) 42 | 43 | -- | We lift all lambdas (and lets, and branch right hand sides) to top-level. 44 | -- Note: In @LetF@, the Bool is there to note whether the bound expression needs to 45 | -- be evaluated immediately. 46 | data Lifted 47 | = AtmF Atom 48 | | AppF Lifted Atom 49 | | PriF PrimOp (List Atom ) 50 | | LZPF PrimOp (List Lifted) 51 | | CasF LAtom (List BranchF) 52 | | LamF ClosureF 53 | | RecF Int (List (Named ClosureF)) Lifted 54 | | LetF Bool ClosureF Lifted 55 | deriving Show 56 | 57 | data BranchF 58 | = BranchF (Named Con) Int ClosureF 59 | | DefaultF ClosureF 60 | deriving Show 61 | 62 | isDefaultF :: BranchF -> Bool 63 | isDefaultF branch = case branch of { DefaultF _ -> True ; _ -> False } 64 | 65 | hasDefaultF :: List BranchF -> Bool 66 | hasDefaultF ls = case ls of { Nil -> False ; Cons x xs -> case isDefaultF x of { True -> True ; _ -> hasDefaultF xs }} 67 | 68 | -- | When allocating a closure, we create a new local environment 69 | -- by pruning the current environment. We also remember the number 70 | -- of remaining arguments (0 = thunk) 71 | data ClosureF = ClosureF ClosureBody (List Level) Arity deriving Show 72 | 73 | data ClosureBody 74 | = StaticBody Static 75 | | InlineBody Lifted 76 | deriving Show 77 | 78 | closureIndex :: ClosureF -> Static 79 | closureIndex c = case c of { ClosureF b _ _ -> case b of { StaticBody s -> s ; _ -> error "closureIndex" } } 80 | 81 | closureArity :: ClosureF -> Arity 82 | closureArity c = case c of { ClosureF _ _ a -> a } 83 | 84 | closureEnv :: ClosureF -> List Level 85 | closureEnv c = case c of { ClosureF _ e _ -> e } 86 | 87 | closureEnvSize :: ClosureF -> Int 88 | closureEnvSize c = case c of { ClosureF _ e _ -> length e } 89 | 90 | -- | A static function is an arity (which is separated to environment 91 | -- size + actual lambda arity) together with a lifted body 92 | data StatFun = StatFun Arity Arity Lifted deriving Show 93 | 94 | statFunTotalArity :: StatFun -> Int 95 | statFunTotalArity sfun = case sfun of { StatFun envsize arity _lifted -> plus envsize arity } 96 | 97 | -- | A top-level definition is a name, a static index and a static function 98 | data TopLev = TopLev (Named Static) StatFun deriving Show 99 | 100 | topLevTotalArity :: TopLev -> Int 101 | topLevTotalArity toplev = case toplev of { TopLev _nidx sfun -> statFunTotalArity sfun } 102 | 103 | -------------------------------------------------------------------------------- 104 | 105 | recogLamsT :: Term -> Pair (List Name) Term 106 | recogLamsT term = case term of 107 | { LamT namedBody -> case namedBody of { Named name body -> 108 | case recogLamsT body of { Pair names rhs -> Pair (Cons name names) rhs } } 109 | ; _ -> Pair Nil term } 110 | 111 | recogAppsF :: Lifted -> Pair Lifted (List Atom) 112 | recogAppsF term = case term of 113 | { AppF tm1 v2 -> case recogAppsF tm1 of { Pair f args -> Pair f (snoc args v2) } 114 | ; _ -> Pair term Nil } 115 | 116 | -- recogLetsT :: Term -> Pair (List Term) Term 117 | -- recogLetsT term = case term of 118 | -- { LetT t1 t2 -> case recogLetsT t2 of { Pair ts body -> Pair (Cons t1 ts) body } 119 | -- ; _ -> Pair Nil term } 120 | 121 | -------------------------------------------------------------------------------- 122 | -- ** Closure converting programs 123 | 124 | -- | The (named) int list is the mapping from the source code top-level functions 125 | -- to the generated code top-level functions 126 | data LiftedProgram = LProgram 127 | { _toplevs :: List TopLev 128 | , _indices :: List (Named Int) 129 | , _main :: Pair Int Lifted } 130 | deriving Show 131 | 132 | coreProgramToLifted :: CoreProgram -> LiftedProgram 133 | coreProgramToLifted coreprg = case coreprg of { CorePrg blocks _mainIdx mainTerm -> let 134 | { nstatic = length defins 135 | ; defins = forgetBlockStructure blocks 136 | ; action1 = sforM defins (\defin -> case located defin of { Defin name term -> 137 | sfmap (\i -> Named name (closureIndex i)) (termToStaticClosure name idSubs 0 term) }) 138 | ; action2 = closureConvert nanoMainIs idSubs 0 mainTerm 139 | ; action = sliftA2 Pair action1 action2 140 | ; mainidx = case findIndex (\def -> stringEq nanoMainIs (ldefinedName def)) defins of { Just i -> i ; Nothing -> error "main not found" } 141 | } in case runState action Nil of { Pair toplist pair2 -> 142 | case pair2 of { Pair idxlist mainlft -> LProgram (reverse toplist) idxlist (Pair mainidx mainlft) } } } 143 | 144 | -------------------------------------------------------------------------------- 145 | -- ** Subsitutions and pruning 146 | 147 | -- | Partial subsitutions 148 | type Subs = (Level -> Maybe Level) 149 | 150 | idSubs :: Subs 151 | idSubs = \j -> ifte (lt j 0) Nothing (Just j) 152 | 153 | composeSubs :: Subs -> Subs -> Subs 154 | composeSubs subs1 subs2 = \j -> case subs1 j of { Nothing -> Nothing ; Just k -> subs2 k } 155 | 156 | -- | Substitution which flips (reverses) the first @n@ things 157 | flipSubs :: Int -> Subs 158 | flipSubs n = \j -> ifte (lt j 0) Nothing (Just (ifte (lt j n) (minus n (inc j)) j)) 159 | 160 | -- | Apply a substitution 161 | applySubs :: Subs -> Level -> Level 162 | applySubs subs j = case subs j of { Just k -> k ; _ -> error "substitution: input not in domain" } 163 | 164 | type PrunedEnv = List Level 165 | 166 | pruningSubs :: Subs -> Level -> Level -> Term -> Pair PrunedEnv Subs 167 | pruningSubs oldsubs boundary level term = Pair pruned subs where 168 | { pruned0 = pruneEnvironment boundary level term 169 | ; pruned = map (applySubs oldsubs) pruned0 170 | ; npruned = length pruned 171 | ; envtable = zip pruned (range npruned) 172 | ; subs j = ifte (lt j boundary) (mapLookup (applySubs oldsubs j) envtable) 173 | (Just (plus (minus j boundary) npruned)) 174 | } 175 | 176 | -- | Figure out the part of the environment used by a term. 177 | -- Returns a set of /levels/ 178 | pruneEnvironment :: Level -> Level -> Term -> IntSet 179 | pruneEnvironment boundary level term = go level term where 180 | { goAtom level atom = case atom of 181 | { VarA nvar -> goVar level (forgetName nvar) 182 | ; ConA _ -> setEmpty 183 | ; KstA _ -> setEmpty } 184 | ; goVar level var = case var of 185 | { IdxV idx -> goLevel (minus level (inc idx)) 186 | ; LevV jdx -> goLevel jdx 187 | ; TopV _ -> setEmpty 188 | ; StrV _ -> setEmpty } 189 | ; goLevel j = ifte (lt j boundary) (setSingleton j) setEmpty 190 | ; go level term = case term of 191 | { AtmT atom -> goAtom level atom 192 | ; AppT t1 a2 -> setUnion (go level t1) (goAtom level a2) 193 | ; PriT _ args -> setUnions (map (goAtom level) args) 194 | ; LZPT _ args -> setUnions (map (go level) args) 195 | ; LamT nbody -> go (inc level) (forgetName nbody) 196 | ; RecT n ts t -> let { level' = plus level n } in setUnions (Cons (go level' t) (map (\t -> go level' (forgetName t)) ts)) 197 | ; LetT nt1 t2 -> setUnion (go level (forgetName nt1)) (go (inc level) t2) 198 | ; CasT v brs -> setUnion (goAtom level (located v)) (setUnions (map (goBranch level) brs)) } 199 | ; goBranch level branch = case branch of 200 | { BranchT _ n rhs -> go (plus level n) rhs 201 | ; DefaultT rhs -> go level rhs } } 202 | 203 | -------------------------------------------------------------------------------- 204 | -- ** Closure conversion 205 | 206 | -- | The closure conversion monad. Note: the list is reverse order! 207 | type ClosM a = State (List TopLev) a 208 | 209 | -- | Take the head entry of top level lists, and add 1 to it's index 210 | nextStatic :: ClosM Static 211 | nextStatic = sbind sget (\list -> case list of { Nil -> sreturn 0 ; Cons toplev _ -> 212 | case toplev of { TopLev named _ -> case named of { Named name s -> sreturn (inc s) }}}) 213 | 214 | -- addTopLev :: TopLev -> ClosM Unit 215 | -- addTopLev what = smodify (Cons what) 216 | 217 | addStatFun :: Name -> StatFun -> ClosM Static 218 | addStatFun name statfun = 219 | sbind nextStatic (\statidx -> 220 | sbind (smodify (Cons (TopLev (Named name statidx) statfun))) (\_ -> sreturn statidx )) 221 | 222 | -- | The closure environment maps original levels to pruned de Bruijn indices 223 | -- relative to the boundary 224 | type ClosEnv = Map Level Idx 225 | 226 | -- | A multi-lambda 227 | data LambdaT 228 | = LambdaT Int Term 229 | deriving Show 230 | 231 | -- | Closure-converting a lambda results in 1) a closure allocation 232 | -- and 2) a static function. The latter we just add to the list of 233 | -- of compiled stuff 234 | -- 235 | -- The name is just a name for the resulting top-level definition 236 | -- (so debugging is easier), and the level is the level where the lambda 237 | -- starts (the \"boundary\" level) 238 | -- 239 | lambdaToClosure :: Name -> Subs -> Level -> LambdaT -> ClosM ClosureF 240 | lambdaToClosure name oldsubs boundary lambda = case lambda of { LambdaT nargs body -> 241 | let { newlevel = plus boundary nargs } in 242 | case pruningSubs oldsubs boundary newlevel body of { Pair prunedEnv newsubs -> 243 | let { npruned = length prunedEnv ; ntotal = plus npruned nargs 244 | ; newsubs' = composeSubs newsubs (flipSubs ntotal) } 245 | in sbind (closureConvert name newsubs' newlevel body) (\statbody -> 246 | sbind (addStatFun name (StatFun npruned nargs statbody)) (\statidx -> 247 | sreturn (ClosureF (StaticBody statidx) prunedEnv nargs))) }} 248 | 249 | termToStaticClosure :: Name -> Subs -> Level -> Term -> ClosM ClosureF 250 | termToStaticClosure name subs level tm = case recogLamsT tm of { Pair args body -> 251 | lambdaToClosure name subs level (LambdaT (length args) body) } 252 | 253 | -- Note: we don't do environment pruning here (boundary = 0) 254 | termToInlineClosure :: Name -> Subs -> Level -> Term -> ClosM ClosureF 255 | termToInlineClosure name subs level tm = 256 | sbind (closureConvert name subs level tm) (\lifted -> 257 | sreturn (ClosureF (InlineBody lifted) Nil 0)) 258 | 259 | -- | Whether to make a static function out of this closure 260 | doInlineClosure :: Term -> Bool 261 | doInlineClosure tm = case tm of 262 | { LamT _ -> False 263 | ; AtmT _ -> True 264 | ; _ -> le (termSize tm) 64 } 265 | 266 | -- | In let bindings, it can happen that we bind some computation. 267 | -- Since on the stack there should be only values, in this case 268 | -- we have to evaluate them. On the other hand, for example lambdas and 269 | -- atoms do not need to be evaluated. 270 | needsToBeEvaluated :: Term -> Bool 271 | needsToBeEvaluated tm = case tm of 272 | { LamT _ -> False 273 | ; AtmT _ -> False 274 | ; LetT _ body -> needsToBeEvaluated body 275 | ; RecT _ _ body -> needsToBeEvaluated body 276 | ; _ -> True } 277 | 278 | termToClosure :: Name -> Subs -> Level -> Term -> ClosM ClosureF 279 | termToClosure name subs level term = ifte (doInlineClosure term) 280 | (termToInlineClosure name subs level term) 281 | (termToStaticClosure name subs level term) 282 | 283 | addPrefix :: Name -> Name -> Name 284 | addPrefix prefix name = append3 prefix "/" name 285 | 286 | closureConvert :: Name -> Subs -> Level -> Term -> ClosM Lifted 287 | closureConvert nameprefix subs level term = go level term where 288 | { prefixed name = addPrefix nameprefix name 289 | -- convert to de Bruijn levels from indices 290 | ; goLev jdx = LevV (applySubs subs jdx ) 291 | ; goIdx level idx = LevV (applySubs subs (minus level (inc idx))) 292 | ; goAtom level atom = case atom of 293 | { VarA named -> case named of { Named name var -> case var of 294 | { IdxV idx -> VarA (Named name (goIdx level idx)) 295 | ; LevV jdx -> VarA (Named name (goLev jdx)) 296 | ; _ -> atom } } 297 | ; _ -> atom } 298 | ; go level term = case term of 299 | { AtmT named -> sreturn (AtmF (goAtom level named)) 300 | ; AppT t1 a2 -> sliftA2 AppF (go level t1) (sreturn (goAtom level a2)) 301 | ; PriT pri args -> sreturn (PriF pri ( map (goAtom level) args)) 302 | ; LZPT pri args -> sfmap (LZPF pri) (smapM (go level) args) 303 | ; LamT _ -> case recogLamsT term of { Pair args body -> 304 | sfmap LamF (lambdaToClosure (prefixed "lambda") subs level (LambdaT (length args) body)) } 305 | ; CasT lv brs -> sfmap (CasF (lfmap (goAtom level) lv)) (smapM (goBranch level) brs) 306 | ; RecT n nts tm -> let { level' = plus level n } 307 | in sliftA2 (RecF n) (smapM (goRec1 level') nts) (go level' tm) 308 | ; LetT ntm body -> case ntm of { Named name tm -> sliftA2 (LetF (needsToBeEvaluated tm)) 309 | (termToClosure (prefixed name) subs level tm) (go (inc level) body) } } 310 | ; goRec1 level named = case named of { Named name tm -> 311 | sfmap (Named name) (termToStaticClosure (prefixed name) subs level tm) } 312 | ; goBranch level br = case br of 313 | { DefaultT rhs -> sfmap (DefaultF ) (termToClosure (prefixed "default") subs level rhs ) 314 | ; BranchT named n rhs -> sfmap (BranchF named n) (lambdaToClosure (prefixed (nameOf named)) subs level (LambdaT n rhs)) 315 | }} 316 | 317 | -------------------------------------------------------------------------------- 318 | -------------------------------------------------------------------------------- /Containers.hs: -------------------------------------------------------------------------------- 1 | -- | Containers 2 | 3 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-} 4 | {-# LANGUAGE Strict #-} 5 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-} 6 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-} 7 | 8 | module Containers where 9 | 10 | -------------------------------------------------------------------------------- 11 | 12 | import Prelude ( Int , Char , Eq , Show ) 13 | import PrimGHC 14 | 15 | -------------------------------------------------------------------------------- 16 | 17 | import Base 18 | 19 | {-% include "Base.hs" %-} 20 | 21 | -------------------------------------------------------------------------------- 22 | -- * Containers 23 | -- ** Sets 24 | 25 | -- | We model sets as sorted lists. We would need an Ord instance so we only 26 | -- do it for Int-s. 27 | type IntSet = List Int 28 | 29 | setEmpty :: IntSet 30 | setEmpty = Nil 31 | 32 | setMember :: Int -> IntSet -> Bool 33 | setMember k set = elem k set 34 | 35 | setSingleton :: Int -> IntSet 36 | setSingleton x = Cons x Nil 37 | 38 | setInsert :: Int -> IntSet -> IntSet 39 | setInsert k set = go set where 40 | { go set = case set of { Nil -> Cons k Nil ; Cons x xs -> case compare k x of 41 | { LT -> Cons k set ; EQ -> set ; GT -> Cons x (go xs) } } } 42 | 43 | setDelete :: Int -> IntSet -> IntSet 44 | setDelete k = go where 45 | { go set = case set of { Nil -> Nil ; Cons x xs -> case compare k x of 46 | { LT -> set ; EQ -> xs ; GT -> Cons x (go xs) } } } 47 | 48 | setInsertMany :: List Int -> IntSet -> IntSet 49 | setInsertMany ks set = foldl (flip setInsert) set ks 50 | 51 | setDeleteMany :: List Int -> IntSet -> IntSet 52 | setDeleteMany ks set = foldl (flip setDelete) set ks 53 | 54 | setIntersect :: IntSet -> IntSet -> IntSet 55 | setIntersect set1 set2 = filter (\x -> setMember x set2) set1 56 | 57 | setIsDisjoint :: IntSet -> IntSet -> Bool 58 | setIsDisjoint set1 set2 = andList (map (\x -> not (setMember x set2)) set1) 59 | 60 | setUnion :: IntSet -> IntSet -> IntSet 61 | setUnion set1 set2 = flipFoldr setInsert set1 set2 62 | 63 | setUnions :: List IntSet -> IntSet 64 | setUnions list = foldl setUnion setEmpty list 65 | 66 | -------------------------------------------------------------------------------- 67 | -- ** Association maps (sorted lists of (key,value) pairs) 68 | 69 | type Map k v = List (Pair k v) 70 | 71 | mapEmpty :: Map k v 72 | mapEmpty = Nil 73 | 74 | mapSingleton :: k -> v -> Map k v 75 | mapSingleton k v = Cons (Pair k v) Nil 76 | 77 | -- | This can be used for sorting. Of course it's quadratic... 78 | mapFromList :: List (Pair Int v) -> Map Int v 79 | mapFromList list = foldl g mapEmpty list where 80 | { g old pair = case pair of { Pair k v -> mapInsert k v (const v) old } } 81 | 82 | -- mapLookup :: Eq k => k -> Map k v -> Maybe v 83 | mapLookup :: Int -> Map Int v -> Maybe v 84 | mapLookup key list = go list where 85 | { go ls = case ls of 86 | { Nil -> Nothing ; Cons this rest -> case this of 87 | { Pair k v -> ifte (eq k key) (Just v) (go rest) } } } 88 | 89 | -- mapDelete :: Eq k => k -> Map k v -> Map k v 90 | mapDelete :: Int -> Map Int v -> Map Int v 91 | mapDelete key list = go list where 92 | { go kvs = case kvs of { Nil -> Nil ; Cons kv rest -> case kv of 93 | { Pair k v -> ifte (eq k key) rest (Cons kv (go rest)) } } } 94 | 95 | -- | For proper insertion we need ordering, but we only have that for Ints... 96 | mapInsert :: Int -> v -> (v -> v) -> Map Int v -> Map Int v 97 | mapInsert key y f list = go list where 98 | { go kvs = case kvs of { Nil -> Cons (Pair key y) Nil ; Cons kv rest -> case kv of 99 | { Pair k v -> case compare k key of 100 | { LT -> Cons kv (go rest) 101 | ; EQ -> Cons (Pair k (f v)) rest 102 | ; GT -> Cons (Pair key y ) kvs } } } } 103 | 104 | mapAdjust :: Int -> (v -> v) -> Map Int v -> Map Int v 105 | mapAdjust key f list = go list where 106 | { go kvs = case kvs of { Nil -> Nil ; Cons kv rest -> case kv of 107 | { Pair k v -> case compare k key of 108 | { LT -> Cons kv (go rest) 109 | ; EQ -> Cons (Pair k (f v)) rest 110 | ; GT -> kvs } } } } 111 | 112 | -------------------------------------------------------------------------------- 113 | -- ** Trie 114 | 115 | data Trie a = Node (Maybe a) (Map Int (Trie a)) deriving Show 116 | 117 | trieEmpty :: Trie a 118 | trieEmpty = Node Nothing Nil 119 | 120 | trieSingleton :: String -> a -> Trie a 121 | trieSingleton str y = case str of { Nil -> Node (Just y) mapEmpty 122 | ; Cons x xs -> Node Nothing (mapSingleton (ord x) (trieSingleton xs y)) } 123 | 124 | trieLookup :: String -> Trie a -> Maybe a 125 | trieLookup str trie = case trie of { Node mb table -> case str of { Nil -> mb 126 | ; Cons x xs -> case mapLookup (ord x) table of { Nothing -> Nothing 127 | ; Just trie' -> trieLookup xs trie' } } } 128 | 129 | trieLookupDefault :: a -> String -> Trie a -> a 130 | trieLookupDefault def str trie = case trie of { Node mb table -> case str of 131 | { Nil -> case mb of { Just y -> y ; Nothing -> def } 132 | ; Cons x xs -> case mapLookup (ord x) table of { Nothing -> def 133 | ; Just trie' -> trieLookupDefault def xs trie' } } } 134 | 135 | trieMember :: String -> Trie a -> Bool 136 | trieMember str trie = isJust (trieLookup str trie) 137 | 138 | trieInsert :: String -> a -> Trie a -> Trie a 139 | trieInsert string y trie = go string trie where 140 | { go str trie = case trie of { Node mb table -> case str of 141 | { Nil -> Node (Just y) table 142 | ; Cons x xs -> Node mb (mapInsert (ord x) (trieSingleton xs y) (go xs) table) } } } 143 | 144 | -- | throws an exception if the key already exists 145 | trieInsertNew :: String -> String -> a -> Trie a -> Trie a 146 | trieInsertNew errmsg string y trie = go string trie where 147 | { go str trie = case trie of { Node mb table -> case str of 148 | { Nil -> case mb of { Nothing -> Node (Just y) table ; _ -> error errmsg } 149 | ; Cons x xs -> Node mb (mapInsert (ord x) (trieSingleton xs y) (go xs) table) } } } 150 | 151 | trieDelete :: String -> Trie a -> Trie a 152 | trieDelete str trie = case trie of { Node mb table -> case str of 153 | { Nil -> Node Nothing table 154 | ; Cons x xs -> Node mb (mapAdjust (ord x) (trieDelete xs) table) } } 155 | 156 | trieAlter :: (Maybe a -> Maybe a) -> String -> Trie a -> Trie a 157 | trieAlter f string trie = case f (trieLookup string trie) of 158 | { Nothing -> trieDelete string trie 159 | ; Just y -> trieInsert string y trie } 160 | 161 | trieUnion :: Trie a -> Trie a -> Trie a 162 | trieUnion trie1 trie2 = go trie2 (trieToList trie1) where 163 | { go trie list = case list of { Nil -> trie ; Cons pair rest -> case pair of { Pair key val -> 164 | go (trieInsert key val trie) rest }}} 165 | 166 | trieUnions :: List (Trie a) -> Trie a 167 | trieUnions list = case list of { Nil -> trieEmpty ; Cons this rest -> case rest of 168 | { Nil -> this ; _ -> trieUnion this (trieUnions rest) }} 169 | 170 | trieFromList :: List (Pair String a) -> Trie a 171 | trieFromList list = foldr f trieEmpty list where { f kv trie = case kv of { Pair k v -> trieInsert k v trie } } 172 | 173 | -- | throws an exception if there is a duplicate key 174 | trieFromListUnique :: (String -> String) -> List (Pair String a) -> Trie a 175 | trieFromListUnique errmsg list = foldr f trieEmpty list where { f kv trie = case kv of { Pair k v -> trieInsertNew (errmsg k) k v trie } } 176 | 177 | trieBuild :: (a -> b) -> (a -> b -> b) -> List (Pair String a) -> Trie b 178 | trieBuild f g xys = foldl insert trieEmpty xys where 179 | { insert old pair = case pair of { Pair k x -> trieAlter (h x) k old } 180 | ; h x mb = case mb of { Nothing -> Just (f x) ; Just y -> Just (g x y) } } 181 | 182 | trieToList :: Trie a -> List (Pair String a) 183 | trieToList trie = go trie where { go trie = case trie of { Node mb table -> let 184 | { f pair = case pair of { Pair k trie' -> map (prepend (chr k)) (go trie') } 185 | ; rest = concat (map f table) 186 | ; prepend x pair = case pair of { Pair xs y -> Pair (Cons x xs) y } } 187 | in case mb of { Nothing -> rest ; Just y -> Cons (Pair Nil y) rest } } } 188 | 189 | trieKeys :: Trie a -> List String 190 | trieKeys trie = map fst (trieToList trie) 191 | 192 | -------------------------------------------------------------------------------- 193 | -- ** Sets of strings 194 | 195 | type TrieSet = Trie Unit 196 | 197 | trieSetSingleton :: String -> TrieSet 198 | trieSetSingleton key = trieSingleton key Unit 199 | 200 | trieSetInsert :: String -> TrieSet -> TrieSet 201 | trieSetInsert key set = trieInsert key Unit set 202 | 203 | trieSetFromList :: List String -> TrieSet 204 | trieSetFromList list = trieFromList (map (\k -> Pair k Unit) list) 205 | 206 | trieSetToList :: TrieSet -> List String 207 | trieSetToList set = map fst (trieToList set) 208 | 209 | -------------------------------------------------------------------------------- 210 | -------------------------------------------------------------------------------- /Core.hs: -------------------------------------------------------------------------------- 1 | 2 | -- | Core language 3 | 4 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-} 5 | {-# LANGUAGE Strict #-} 6 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-} 7 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-} 8 | 9 | module Core where 10 | 11 | -------------------------------------------------------------------------------- 12 | 13 | import Prelude ( Int , Char , Eq , Show ) 14 | import PrimGHC 15 | 16 | -------------------------------------------------------------------------------- 17 | 18 | import Base 19 | import Types 20 | import PrimOps 21 | 22 | {-% include "Base.hs" %-} 23 | {-% include "Types.hs" %-} 24 | {-% include "PrimOps.hs" %-} 25 | 26 | -------------------------------------------------------------------------------- 27 | -- ** Terms 28 | 29 | -- pattern VarT nvar = AtmT (VarA nvar) 30 | -- pattern ConT ncon = AtmT (ConA ncon) 31 | -- pattern KstT lit = AtmT (KstA lit ) 32 | 33 | type LTerm = Located Term 34 | 35 | data Term 36 | = AtmT Atom 37 | | LamT (Named Term) 38 | | AppT Term Atom 39 | | PriT PrimOp (List Atom) 40 | | LZPT PrimOp (List Term) 41 | | LetT (Named Term) Term 42 | | RecT Int (List (Named Term)) Term 43 | | CasT LAtom (List BranchT) 44 | | MainT 45 | deriving Show 46 | 47 | isLambdaTerm :: Term -> Bool 48 | isLambdaTerm term = case term of { LamT _ -> True ; _ -> False } 49 | 50 | data BranchT 51 | = BranchT (Named Con) Int Term 52 | | DefaultT Term 53 | deriving Show 54 | 55 | -- | A list of top-level terms and the main (index and term, even though it's redundant) 56 | data CoreProgram 57 | = CorePrg (Program Term) TopIdx Term 58 | deriving Show 59 | 60 | -------------------------------------------------------------------------------- 61 | 62 | showAtom :: Atom -> String 63 | showAtom a = prettyAtom a 64 | 65 | showTerm :: Term -> String 66 | showTerm term = case term of 67 | { AtmT atom -> showAtom atom 68 | ; LamT nbody -> case nbody of { Named name body -> concat [ "(" , singleton backslashC , name , " -> " , showTerm body , ")" ] } 69 | ; AppT t a -> concat [ "(" , showTerm t , " " , showAtom a , ")" ] 70 | ; PriT pop args -> case pop of { PrimOp _ p -> concat [ showPrim p , "(" , intercalate "," (map showAtom args) , ")" ]} 71 | ; LZPT pop args -> case pop of { PrimOp _ p -> concat [ showPrim p , "(" , intercalate "," (map showTerm args) , ")" ]} 72 | ; LetT nt body -> concat [ "[let " , showNamedTerm nt , " in " , showTerm body , "]" ] 73 | ; RecT _ nts body -> concat [ "letrec { " , intercalate " ; " (map showNamedTerm nts) , " } in " , showTerm body ] 74 | ; CasT la brs -> concat [ "case " , showAtom (located la) , " of { " ,intercalate " ; " (map showBranchT brs) , " }" ] 75 | ; MainT -> "
" } 76 | 77 | showTermList :: List Term -> String 78 | showTermList tms = append3 "[" (intercalate "," (map showTerm tms)) "]" 79 | 80 | showNamedTerm :: Named Term -> String 81 | showNamedTerm nterm = case nterm of { Named n t -> append3 n " = " (showTerm t) } 82 | 83 | showBranchT :: BranchT -> String 84 | showBranchT branch = case branch of 85 | { BranchT ncon n rhs -> concat [ nameOf ncon , " " , intercalate " " (map (\i -> appendInt "a" i) (rangeFrom 1 n)) , " -> " , showTerm rhs ] 86 | ; DefaultT rhs -> append "_ -> " (showTerm rhs) } 87 | 88 | -------------------------------------------------------------------------------- 89 | 90 | programToTerm :: Program Term -> Term 91 | programToTerm blocks = go 0 blocks where 92 | { go k blocks = case blocks of { Nil -> MainT ; Cons this rest -> case this of 93 | { Recursive defins -> let { n = length defins ; k' = plus k n} in 94 | RecT n (map (worker k') defins) (go k' rest) 95 | ; NonRecursive defin -> LetT ( worker k defin ) (go (inc k) rest) }} 96 | ; worker level0 ldefin = case ldefin of { Located loc defin -> case defin of 97 | { Defin name term -> Named name (transformVars f level0 term) }} 98 | where { f _level var = case var of { IdxV i -> IdxV i 99 | ; LevV j -> LevV (plus level0 j) 100 | ; TopV k -> LevV k 101 | ; _ -> var }}} 102 | 103 | -- | it's important to eliminate levels because they can point to top-level definitions...? 104 | -- TODO: to be really correct, we should recognize top-level definitions... 105 | termToProgram :: Term -> Program Term 106 | termToProgram term = go 0 term where 107 | { go k term = case term of 108 | { RecT n nts body -> let { k' = plus k n } in 109 | Cons (Recursive (map (worker k') nts)) (go k' body) 110 | ; LetT nt body -> Cons (NonRecursive ( worker k nt )) (go (inc k) body) 111 | ; MainT -> Nil } 112 | ; worker level0 named = case named of { Named name term -> fakeLocated (Defin name (transformVars f level0 term)) } where 113 | { f level var = case var of { IdxV i -> g level (minus level (inc i)) ; LevV j -> g level j ; _ -> var } 114 | ; g level jdx = ifte (lt jdx level0) (TopV jdx) (IdxV (minus level (inc jdx))) 115 | }} 116 | 117 | findToplevelMain :: Term -> TopIdx 118 | findToplevelMain term = go 0 term where 119 | { go k term = case term of 120 | { RecT n _ body -> go (plus k n) body 121 | ; LetT nt body -> ifte (stringEq (nameOf nt) "main") k (go (inc k) body) 122 | ; MainT -> error "findToplevelMain: top level `main` not found" }} 123 | 124 | -------------------------------------------------------------------------------- 125 | 126 | transformVars :: (Level -> Var -> Var) -> Level -> Term -> Term 127 | transformVars f = transformAtoms g where { g level atom = case atom of 128 | { VarA nvar -> case nvar of { Named name var -> VarA (Named name (f level var)) } ; _ -> atom } } 129 | 130 | transformAtoms :: (Level -> Atom -> Atom) -> Level -> Term -> Term 131 | transformAtoms f = transformTerm worker where 132 | { worker level term = case term of 133 | { AtmT a -> AtmT (f level a) 134 | ; AppT t a -> AppT t (f level a) 135 | ; PriT p as -> PriT p (map (f level) as) 136 | ; CasT a brs -> CasT (lfmap (f level) a) brs 137 | ; _ -> term } } 138 | 139 | transformTerm :: (Level -> Term -> Term) -> Level -> Term -> Term 140 | transformTerm f level term = go level term where 141 | { go level term = case term of 142 | { AtmT a -> f level (AtmT a) 143 | ; LamT nt -> f level (LamT (nfmap (go (inc level)) nt)) 144 | ; AppT t a -> f level (AppT (go level t) a) 145 | ; PriT p as -> f level (PriT p as) 146 | ; LZPT p ts -> f level (LZPT p (map (go level) ts)) 147 | ; LetT nt1 t2 -> f level (LetT (nfmap (go level) nt1) (go (inc level) t2)) 148 | ; RecT n nts t -> f level (let { level' = plus level n } in RecT n (map (nfmap (go level')) nts) (go level' t)) 149 | ; CasT la brs -> f level (CasT la (map (goBranch level) brs)) 150 | ; MainT -> f level MainT } 151 | ; goBranch level branch = case branch of 152 | { BranchT c n t -> BranchT c n (go (plus level n) t) 153 | ; DefaultT t -> DefaultT (go level t) } } 154 | 155 | -------------------------------------------------------------------------------- 156 | 157 | -- | Shift de Bruijn indices in variables. We /add/ the offset to the indices, 158 | -- leaving levels unchanged. 159 | shiftVarRight :: Int -> Var -> Var 160 | shiftVarRight ofs var = case var of 161 | { IdxV i -> IdxV (plus i ofs) 162 | ; LevV j -> LevV j 163 | ; _ -> var } 164 | 165 | -- | Shift de Bruijn indices in atoms 166 | shiftAtomRight :: Int -> Atom -> Atom 167 | shiftAtomRight ofs atom = case atom of { VarA namedVar -> VarA (nfmap (shiftVarRight ofs) namedVar) ; _ -> atom } 168 | 169 | -- | The condition should returin @True@ \"outside the term\" 170 | shiftVarConditional :: (Level -> Bool) -> Int -> Level -> Var -> Var 171 | shiftVarConditional cond ofs level var = case var of 172 | { IdxV i -> let { j = minus level (inc i) } in 173 | ifte (cond j) (IdxV (plus i ofs)) var 174 | ; LevV j -> ifte (cond j) var (LevV (plus j ofs)) 175 | ; _ -> var } 176 | 177 | shiftTerm :: Level -> Level -> Term -> Term 178 | shiftTerm oldlevel newlevel term = let { shift = minus newlevel oldlevel } in 179 | transformVars (\level var -> shiftVarConditional (\j -> lt j oldlevel) shift level var) oldlevel term 180 | 181 | -------------------------------------------------------------------------------- 182 | 183 | atomIndexToLevel :: Level -> Atom -> Atom 184 | atomIndexToLevel level atom = case atom of { VarA nvar -> case nvar of { Named name var -> case var of 185 | { IdxV i -> VarA (Named name (LevV (minus level (inc i)))) ; _ -> atom }} ; _ -> atom } 186 | 187 | atomLevelToIndex :: Level -> Atom -> Atom 188 | atomLevelToIndex level atom = case atom of { VarA nvar -> case nvar of { Named name var -> case var of 189 | { LevV j -> VarA (Named name (IdxV (minus level (inc j)))) ; _ -> atom }} ; _ -> atom } 190 | 191 | termIndicesToLevels :: Level -> Term -> Term 192 | termIndicesToLevels level term = transformAtoms atomIndexToLevel level term 193 | 194 | termLevelsToIndices :: Level -> Term -> Term 195 | termLevelsToIndices level term = transformAtoms atomLevelToIndex level term 196 | 197 | -------------------------------------------------------------------------------- 198 | 199 | termSize :: Term -> Size 200 | termSize term = go term where 201 | { goNamed named = case named of { Named _ tm -> go tm } 202 | ; go term = case term of 203 | { AtmT _ -> 1 204 | ; LamT nbody -> inc (goNamed nbody) 205 | ; AppT tm v -> inc (go tm) 206 | ; PriT _ _ -> 1 207 | ; LZPT _ ts -> inc (sum (map go ts)) 208 | ; LetT nt1 t2 -> inc (plus (goNamed nt1) (go t2)) 209 | ; RecT _ ls tm -> inc (plus (sum (map goNamed ls)) (go tm)) 210 | ; CasT _ brs -> inc (sum (map goBranch brs)) } 211 | ; goBranch br = case br of 212 | { DefaultT tm -> go tm 213 | ; BranchT _ k tm -> plus k (go tm) } } 214 | 215 | -------------------------------------------------------------------------------- 216 | -------------------------------------------------------------------------------- /DataCon.hs: -------------------------------------------------------------------------------- 1 | 2 | -- | Data constructors 3 | 4 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-} 5 | {-# LANGUAGE Strict #-} 6 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-} 7 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-} 8 | 9 | module DataCon where 10 | 11 | -------------------------------------------------------------------------------- 12 | 13 | import Prelude ( Int , Char , Eq , Show ) 14 | import PrimGHC 15 | 16 | -------------------------------------------------------------------------------- 17 | 18 | import Base 19 | import Containers 20 | import Types 21 | import Syntax 22 | 23 | {-% include "Base.hs" %-} 24 | {-% include "Containers.hs" %-} 25 | {-% include "Types.hs" %-} 26 | {-% include "Syntax.hs" %-} 27 | 28 | -------------------------------------------------------------------------------- 29 | -- ** Data constructors 30 | 31 | isDCon :: Name -> Bool 32 | isDCon name = isUpper (head name) 33 | 34 | -- -- | Mapping constructor names to constructor tags 35 | -- type DataConTable = Trie Con 36 | 37 | con_False = 0 38 | con_True = 1 39 | con_Unit = 2 40 | con_Nil = 3 41 | con_Cons = 4 42 | con_Nothing = 5 43 | con_Just = 6 44 | con_Pair = 7 45 | con_ActionToken = 8 46 | con_ReadMode = 9 47 | con_WriteMode = 10 48 | con_AppendMode = 11 49 | con_ReadWriteMode = 12 50 | 51 | type DConState = Pair Int DataConTable 52 | 53 | initialDConState :: DConState 54 | initialDConState = Pair 13 (trieFromList predefinedDataCons) 55 | 56 | predefinedDataCons :: List (Pair String Int) 57 | predefinedDataCons = 58 | [ Pair "False" con_False , Pair "True" con_True , Pair "Unit" con_Unit , Pair "Pair" con_Pair 59 | , Pair "Nil" con_Nil , Pair "Cons" con_Cons , Pair "Nothing" con_Nothing , Pair "Just" con_Just 60 | , Pair "ReadMode" con_ReadMode , Pair "WriteMode" con_WriteMode , Pair "ActionToken" con_ActionToken 61 | , Pair "AppendMode" con_AppendMode , Pair "ReadWriteMode" con_ReadWriteMode ] 62 | 63 | -- | Collect data constructors from the source. 64 | -- 65 | -- Encoding of constructors tags: 66 | -- 67 | -- * 0 = False 68 | -- * 1 = True 69 | -- * 2 = Unit 70 | -- * 3 = Nil 71 | -- * 4 = Cons 72 | -- * 5 = Nothing 73 | -- * 6 = Just 74 | -- * 7 = Pair 75 | -- * 8 = RealWorld 76 | -- * 9 = ReadMode 77 | -- * 10 = WriteMode 78 | -- * 11 = AppendMode 79 | -- * 12 = ReadWriteMode 80 | -- * 13.. = user defined constructors 81 | -- 82 | -- We need to fix @Int@, @Char@, @False@, @True@, @Unit@, @Nil@, @Cons@, @Just@, @Nothing@, @Pair@ 83 | -- and the file access modes because the primops use them 84 | -- 85 | collectDataCons :: List (Defin Expr) -> DataConTable 86 | collectDataCons defins = snd (execState action initialDConState) where 87 | { action = smapM_ collectDataConsWorker (map definedWhat defins) } 88 | 89 | collectDataConsWorker :: Expr -> State DConState Unit 90 | collectDataConsWorker expr = go expr where 91 | { insert name = sbind sget (\pair -> case pair of { Pair n table -> case trieLookup name table of 92 | { Just k -> sreturn Unit 93 | ; Nothing -> sput (Pair (inc n) (trieInsert name n table)) } }) 94 | ; go expr = case expr of 95 | { VarE lnam -> let { name = located lnam } in case isDCon name of { False -> sreturn Unit ; True -> insert name } 96 | ; StrE k -> sreturn Unit 97 | ; AppE e1 e2 -> sseq (go e1) (go e2) 98 | ; LamE _ body -> go body 99 | ; LetE defs body -> goLet defs body 100 | ; RecE defs body -> goLet defs body 101 | ; CaseE e branches -> sseq (lgo e) (smapM_ (\br -> case br of 102 | { BranchE con _ rhs -> sbind (insert con) (\_ -> go rhs) 103 | ; DefaultE rhs -> go rhs }) branches) 104 | ; LitE _ -> sreturn Unit 105 | ; ListE list -> smapM_ go list 106 | ; PrimE _ as -> smapM_ go as 107 | } 108 | ; lgo lexpr = case lexpr of { Located loc expr -> sfmap (Located loc) (go expr) } 109 | ; goLet defs body = sseq 110 | (smapM_ (\defin -> case located defin of { Defin _ rhs -> go rhs }) defs ) (go body) } 111 | 112 | -------------------------------------------------------------------------------- 113 | -------------------------------------------------------------------------------- /Dependency.hs: -------------------------------------------------------------------------------- 1 | 2 | -- | Dependency analysis: Partitioning lets into recursive and non-recursive parts 3 | 4 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-} 5 | {-# LANGUAGE Strict #-} 6 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-} 7 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-} 8 | 9 | module Dependency where 10 | 11 | -------------------------------------------------------------------------------- 12 | 13 | import Prelude ( Int , Char , Eq , Show ) 14 | import PrimGHC 15 | 16 | -------------------------------------------------------------------------------- 17 | 18 | import Base 19 | import Containers 20 | import Types 21 | -- import Core 22 | import Syntax 23 | 24 | {-% include "Base.hs" %-} 25 | {-% include "Containers.hs" %-} 26 | {-% include "Types.hs" %-} 27 | -- {-% include "Core.hs" %-} 28 | {-% include "Syntax.hs" %-} 29 | 30 | -------------------------------------------------------------------------------- 31 | -- * partition lets into recursive and non-recursive parts 32 | 33 | data Let 34 | = Let1 LDefinE 35 | | LetRec (List LDefinE) 36 | deriving Show 37 | 38 | isRecursiveDefin :: DefinE -> Bool 39 | isRecursiveDefin def = case def of { Defin name rhs -> trieMember name (exprFreeVars rhs) } 40 | 41 | mkLet :: List LDefinE -> Let 42 | mkLet list = case mbSingleton list of 43 | { Nothing -> LetRec list 44 | ; Just def -> ifte (isRecursiveDefin (located def)) (LetRec (singleton def)) (Let1 def) } 45 | 46 | checkForDuplicates :: List LDefinE -> a -> a 47 | checkForDuplicates defins what = case trieFromListUnique duplicate (map (compose definToPair located) defins) of 48 | { Node _ _ -> what } where { duplicate n = concat [ "multiple declaration of " , quoteString n ] } 49 | 50 | -- debug "graph" (trieToList graph) (debug "SCC" sccs ( 51 | partitionLets :: List LDefinE -> List Let 52 | partitionLets ldefins = map (compose mkLet (map lkp)) sccs where 53 | { names = map (compose definedName located) ldefins 54 | ; isName n = stringElem n names 55 | ; graph = trieFromList (for ldefins (\ldef -> case ldef of { Located loc def -> case def of { Defin name rhs -> 56 | Pair name (filter isName (trieSetToList (exprFreeVars rhs))) }} )) 57 | ; sccs = dependencyAnalysis (checkForDuplicates ldefins graph) 58 | ; defTrie = trieFromList (map ldefinToPair ldefins) 59 | ; lkp n = case trieLookup n defTrie of { Just locy -> case locy of { Located loc y -> Located loc (Defin n y) } 60 | ; Nothing -> error "partitionLets: shouldn't happen" } } 61 | 62 | -- | Top-level everything is letrec, but we still do the reordering. 63 | reorderProgram :: List (LDefin Expr) -> Program Expr 64 | reorderProgram list = worker (checkRecursiveLets (reorderLets (RecE list MainE))) where 65 | { worker expr = case expr of 66 | { LetE defs body -> append (map NonRecursive defs) (worker body) 67 | ; RecE defs body -> Cons (Recursive defs) (worker body) 68 | ; MainE -> Nil }} 69 | 70 | -- | check for \"lambdas only\" condition in letrecs 71 | checkRecursiveLets :: Expr -> Expr 72 | checkRecursiveLets expr = go expr where 73 | { go expr = case expr of 74 | { VarE _ -> expr 75 | ; AppE e1 e2 -> AppE (go e1) (go e2) 76 | ; LamE v body -> LamE v (go body) 77 | ; LetE defs body -> LetE (map goDefin defs) (go body) 78 | ; RecE defs body -> let { bad = filter (\ldef -> not (isLambdaExpr (definedWhat (located ldef)))) defs } 79 | in case bad of 80 | { Nil -> RecE (map goDefin defs) (go body) 81 | ; _ -> let { badwhat = map showNameAndLoc bad ; text = intercalate ", " badwhat } 82 | in (error (append "recursive definitions must be lambdas: " text)) } 83 | ; CaseE what brs -> CaseE what (map goBranch brs) 84 | ; LitE _ -> expr 85 | ; ListE list -> ListE (map go list) 86 | ; PrimE p list -> PrimE p (map go list) 87 | ; StrE _ -> expr 88 | ; MainE -> expr } 89 | ; goDefin ldefin = case ldefin of { Located loc defin -> case defin of 90 | { Defin name rhs -> Located loc (Defin name (go rhs)) }} 91 | ; goBranch branch = case branch of 92 | { DefaultE rhs -> DefaultE (go rhs) 93 | ; BranchE con args rhs -> BranchE con args (go rhs) } } 94 | 95 | reorderLets :: Expr -> Expr 96 | reorderLets expr = go expr where 97 | { go expr = case expr of 98 | { VarE _ -> expr 99 | ; LitE _ -> expr 100 | ; StrE _ -> expr 101 | ; AppE e1 e2 -> AppE (go e1) (go e2) 102 | ; LamE n body -> LamE n (go body) 103 | ; LetE defs e -> LetE (map goDefin defs) (go e) 104 | ; RecE defs e -> let { ps = partitionLets (map goDefin defs) } in letWorker ps (go e) 105 | ; CaseE e brs -> CaseE (lfmap go e) (map goBranch brs) 106 | ; ListE es -> ListE (map go es) 107 | ; PrimE p es -> PrimE p (map go es) 108 | ; MainE -> MainE } 109 | ; goDefin ldef = lfmap (\def -> case def of { Defin n rhs -> Defin n (go rhs) }) ldef 110 | ; goBranch branch = case branch of 111 | { DefaultE rhs -> DefaultE (go rhs) 112 | ; BranchE con args rhs -> BranchE con args (go rhs) } 113 | ; letWorker ls body = case ls of { Nil -> body ; Cons this rest -> case this of 114 | { Let1 def1 -> LetE (singleton def1) (letWorker rest body) 115 | ; LetRec defs -> RecE defs (letWorker rest body) }} } 116 | 117 | -------------------------------------------------------------------------------- 118 | -- * Dependency graphs 119 | 120 | type Vtx = Name 121 | 122 | type Graph = Trie (List Vtx) 123 | 124 | graphToList :: Graph -> List (Pair Vtx Vtx) 125 | graphToList g = concat (map worker (trieToList g)) where 126 | { worker pair = case pair of { Pair v ws -> map (\w -> Pair v w) ws } } 127 | 128 | graphFromList :: List (Pair Vtx Vtx) -> Graph 129 | graphFromList = trieBuild singleton Cons 130 | 131 | -- | NB: if we naively flip by @graphFromList . map swap . graphToList@, we will 132 | -- lose vertices with no inbound edges! So we have to reinsert those 133 | flipGraph :: Graph -> Graph 134 | flipGraph graph = insertKeys (graphFromList (map swap (graphToList graph))) where 135 | { keys = trieKeys graph 136 | ; insertKeys g = foldl (\old k -> trieAlter h k old) g keys 137 | ; h mb = case mb of { Nothing -> Just Nil ; Just _ -> mb } } 138 | 139 | dependencyAnalysis :: Graph -> List (List Vtx) 140 | dependencyAnalysis g = tarjanSCC (flipGraph g) 141 | 142 | -------------------------------------------------------------------------------- 143 | -- * Tarjan's topologically sorted SCC algorithm 144 | 145 | -- | Note: the table @links@ consists of @(index,lowlink)@ pairs 146 | data Tarjan = Tarjan 147 | { _next :: Int 148 | , _stack :: List Vtx 149 | , _links :: Trie (Pair Int Int) 150 | , _output :: List (List Vtx) 151 | } 152 | deriving Show 153 | 154 | -- | Based on 155 | tarjanSCC :: Graph -> (List (List Vtx)) 156 | tarjanSCC graph = case execState (smapM_ worker vtxSet) iniState of { Tarjan _ _ _ out -> out } where 157 | { iniState = Tarjan 0 [] trieEmpty [] 158 | ; vtxSet = trieKeys graph 159 | ; worker v = sbind sget (\state -> case state of { Tarjan next stack links out -> 160 | case trieLookup v links of { Nothing -> scc v ; _ -> sreturn Unit }}) 161 | -- scc :: Vtx -> State Tarjan Unit 162 | ; scc v = sbind sget (\state -> case state of { Tarjan next stack links out -> 163 | sseq3 (sput (Tarjan (inc next) (Cons v stack) (trieInsert v (Pair next next) links) out)) 164 | (successor v) (popAndOut v) }) 165 | -- successor :: Vtx -> State Tarjan Unit 166 | ; successor v = sforM_ (trieLookupDefault Nil v graph) (\w -> 167 | sbind sget (\state -> case state of { Tarjan next stack links output -> 168 | case trieLookup w links of 169 | { Nothing -> sseq (scc w) 170 | (sbind sget (\state -> case state of { Tarjan next stack links output -> 171 | case trieLookup v links of { Just vpair -> case vpair of { Pair vi vl -> 172 | case trieLookup w links of { Just wpair -> case wpair of { Pair _ wl -> 173 | sput (Tarjan next stack (trieInsert v (Pair vi (min vl wl)) links) output) }}}}})) 174 | ; Just pair -> case pair of { Pair wi wl -> swhen (stringElem w stack) 175 | (case trieLookup v links of { Just vpair -> case vpair of { Pair vi vl -> 176 | case trieLookup w links of { Just wpair -> case wpair of { Pair wi _ -> 177 | sput (Tarjan next stack (trieInsert v (Pair vi (min vl wi)) links) output) }}}})} }})) 178 | -- popAndOut :: Vtx -> State Tarjan Unit 179 | ; popAndOut v = sbind sget (\state -> case state of { Tarjan next stack links output -> 180 | case trieLookup v links of { Just vpair -> case vpair of { Pair vi vl -> swhen (eq vi vl) 181 | (let { this = takeWhile (\x -> not (stringEq x v)) stack 182 | ; stack' = tail (dropWhile (\x -> not (stringEq x v)) stack) 183 | ; output' = Cons (Cons v this) output } 184 | in sput (Tarjan next stack' links output') )}}}) } 185 | 186 | -------------------------------------------------------------------------------- 187 | 188 | -------------------------------------------------------------------------------- /Eval.hs: -------------------------------------------------------------------------------- 1 | 2 | -- | The evaluator (interpreter). 3 | -- 4 | -- Note: this is not needed for compilation, it's simply provided as an extra. 5 | -- 6 | 7 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-} 8 | {-# LANGUAGE Strict #-} 9 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-} 10 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-} 11 | 12 | module Eval where 13 | 14 | -------------------------------------------------------------------------------- 15 | 16 | import Prelude ( Int , Char , Eq , Show ) 17 | import PrimGHC 18 | 19 | -------------------------------------------------------------------------------- 20 | 21 | import Base 22 | import Containers 23 | import Types 24 | import PrimOps 25 | import DataCon 26 | import Core 27 | 28 | {-% include "Base.hs" %-} 29 | {-% include "Types.hs" %-} 30 | {-% include "Containers.hs" %-} 31 | {-% include "PrimOps.hs" %-} 32 | {-% include "DataCon.hs" %-} 33 | {-% include "Core.hs" %-} 34 | 35 | -------------------------------------------------------------------------------- 36 | -- ** Runtime values 37 | 38 | -- | Note: for recursive lets, we need some delaying mechanism, because 39 | -- this is a strict language, so we don't have cyclic data structures. 40 | -- 41 | -- The idea is that in case of a letrec, the first two argument of LamV 42 | -- store the recursive part, and the third stores the rest of the environment. 43 | data Value 44 | = IntV Int 45 | | ChrV Char 46 | | HdlV Handle 47 | | ConV Con (List Value) 48 | | LamV (Value -> EvalM Value) 49 | | IO_V Action 50 | 51 | -- | This is a \"newtype\" only because the two layers of IO was really confusing... 52 | data Action = Action (IO Value) 53 | 54 | runAction :: Action -> IO Value 55 | runAction action = case action of { Action m -> m } 56 | 57 | showValue_ :: Value -> String 58 | showValue_ value = showValue Nil value 59 | 60 | showValue' :: StaticEnv -> Value -> String 61 | showValue' statenv val = case statenv of { StaticEnv dconNames _ _ -> showValue dconNames val } 62 | 63 | type DConNames = Map Int String 64 | 65 | showValue :: DConNames -> Value -> String 66 | showValue conNames value = case value of 67 | { IntV n -> showInt n 68 | ; ChrV c -> quoteChar c 69 | ; HdlV _ -> "" 70 | ; ConV con args -> 71 | let { tag = case mapLookup con conNames of { Just name -> name ; Nothing -> appendInt "Con#" con } } 72 | in case args of { Nil -> tag ; Cons _ _ -> parenString (unwords (Cons tag (map (showValue conNames) args))) } 73 | ; LamV _ -> "" 74 | ; IO_V _ -> "" 75 | } 76 | 77 | printValue :: DConNames -> Value -> IO Unit 78 | printValue conNames x = putStrLn (showValue conNames x) 79 | 80 | eqValue :: Value -> Value -> Bool 81 | eqValue val1 val2 = case val1 of 82 | { IntV i1 -> case val2 of { IntV i2 -> eq i1 i2 ; _ -> False } 83 | ; ChrV c1 -> case val2 of { ChrV c2 -> ceq c1 c2 ; _ -> False } 84 | -- ; HdlV h1 -> case val2 of { HdlV h2 -> heq h1 h2 ; _ -> False } 85 | ; ConV con1 args1 -> case val2 of 86 | { ConV con2 args2 -> and3 (eq con1 con2) (eq (length args1) (length args2)) 87 | (andList (zipWith eqValue args1 args2)) 88 | ; _ -> False } 89 | ; LamV _ -> False 90 | ; IO_V _ -> False 91 | } 92 | 93 | -------------------------------------------------------------------------------- 94 | -- ** Marshalling values 95 | 96 | boolToValue :: Bool -> Value 97 | boolToValue b = ifte b (ConV con_True Nil) (ConV con_False Nil) 98 | 99 | valueToBool :: Value -> Bool 100 | valueToBool val = case val of { ConV con args -> eq con con_True ; _ -> error "marshalling: not a boolean" } 101 | 102 | intToValue :: Int -> Value 103 | intToValue = IntV 104 | 105 | valueToInt :: Value -> Int 106 | valueToInt val = case val of { IntV x -> x ; _ -> error "marshalling: not an integer" } 107 | 108 | charToValue :: Char -> Value 109 | charToValue = ChrV 110 | 111 | maybeCharToValue :: Maybe Char -> Value 112 | maybeCharToValue mb = case mb of { Nothing -> ConV con_Nothing Nil ; Just c -> ConV con_Just (singleton (ChrV c)) } 113 | 114 | valueToChar :: Value -> Char 115 | valueToChar val = case val of { ChrV c -> c ; _ -> error "marshalling: not a character" } 116 | 117 | unitToValue :: Unit -> Value 118 | unitToValue Unit = ConV con_Unit Nil 119 | 120 | valueToUnit :: Value -> Unit 121 | valueToUnit val = case val of { ConV con _ -> ifte (eq con con_Unit) Unit err ; _ -> err } where 122 | { err = error "marshalling: not a unit" } 123 | 124 | handleToValue :: Handle -> Value 125 | handleToValue = HdlV 126 | 127 | valueToHandle :: Value -> Handle 128 | valueToHandle val = case val of { HdlV h -> h ; _ -> error "marshalling: not a handle" } 129 | 130 | valueToIOMode :: Value -> IOMode 131 | valueToIOMode value = case value of 132 | { ConV con args -> ifte (eq con con_ReadMode ) ReadMode ( 133 | ifte (eq con con_WriteMode ) WriteMode ( 134 | ifte (eq con con_AppendMode ) AppendMode ( 135 | ifte (eq con con_ReadWriteMode ) ReadWriteMode (err value)))) 136 | ; _ -> err value } where { err v = error (append "marshalling: not an IO mode: " (quoteString (showValue_ v))) } 137 | 138 | listToValue :: List Value -> Value 139 | listToValue list = case list of { Nil -> ConV con_Nil Nil 140 | ; Cons x xs -> ConV con_Cons [ x , listToValue xs ] } 141 | 142 | valueToList :: Value -> List Value 143 | valueToList value = case value of 144 | { ConV con args -> ifte (neq con con_Cons) Nil 145 | (case mbPair args of { Nothing -> Nil ; Just pair -> case pair of 146 | { Pair u v -> Cons u (valueToList v) } } ) } 147 | 148 | stringToValue :: String -> Value 149 | stringToValue = compose listToValue (map charToValue) 150 | 151 | valueToString :: Value -> String 152 | valueToString = compose (map valueToChar) valueToList 153 | 154 | maybeStringToValue :: Maybe String -> Value 155 | maybeStringToValue mb = case mb of { Nothing -> ConV con_Nothing Nil ; Just s -> ConV con_Just (singleton (stringToValue s)) } 156 | 157 | literalToValue :: Literal -> Value 158 | literalToValue lit = case lit of 159 | { IntL k -> IntV k 160 | ; ChrL c -> ChrV c 161 | ; StrL s -> stringToValue s } 162 | 163 | -------------------------------------------------------------------------------- 164 | -- ** The environment 165 | 166 | -- | The environment. We need some hacking to handle letrec blocks - as we cannot have cyclic data structure, we have to 167 | -- do something else. So we just simulate the stack allocation structure. 168 | data Env 169 | = NilEnv 170 | | ConsEnv Value Env 171 | | RecEnv Int (List Term) Env 172 | 173 | indexEnv :: StaticEnv -> Int -> Env -> Value 174 | indexEnv statEnv k env = go k env where 175 | { go k env = case env of 176 | { NilEnv -> error "indexEnv: indexing out of bounds" 177 | ; ConsEnv val rest -> ifte (eq k 0) val (go (dec k ) rest) 178 | ; RecEnv n tms rest -> ifte (lt k n) (mkLamV statEnv env (unLam (index k tms))) (go (minus k n) rest) } 179 | ; unLam tm = case tm of { LamT nt -> forgetName nt ; _ -> error "indexEnv: fatal: recursive binding must be lambda" } } 180 | 181 | lengthEnv :: Env -> Int 182 | lengthEnv env = case env of { NilEnv -> 0 ; ConsEnv _ rest -> inc (lengthEnv rest) ; RecEnv n _ rest -> plus n (lengthEnv rest) } 183 | 184 | -- -- | for debugging only 185 | -- showEnv :: Env -> String 186 | -- showEnv env = Cons '[' (go env) where { go env = case env of 187 | -- { NilEnv -> "]" 188 | -- ; ConsEnv v rest -> append3 (showValue_ v) " : " (go rest) 189 | -- ; RecEnv _ tms rest -> append3 (showTermList tms) " : " (go rest) }} 190 | -- 191 | -- -- | for debugging only 192 | -- printEnv :: Env -> IO Unit 193 | -- printEnv env = go 0 env where { go k env = case env of 194 | -- { NilEnv -> ioret_ 195 | -- ; ConsEnv v rest -> ioseq (putStrLn (append3 (showInt k) " => " (showValue_ v))) (go (inc k) rest) 196 | -- ; RecEnv n tms rest -> ioseq (putStrLn (append3 (showInt k) " => " (showTermList tms))) (go (plus k n) rest) }} 197 | 198 | -------------------------------------------------------------------------------- 199 | -- ** The evaluator 200 | 201 | -- | The static execution environment: data constructors, string constants, index of @main@ 202 | data StaticEnv = StaticEnv 203 | { _dcons :: Map Int String 204 | , _strLits :: List String 205 | , _mainIdx :: TopIdx 206 | } 207 | deriving Show 208 | 209 | pushEnv1 :: Value -> Env -> Env 210 | pushEnv1 value env = ConsEnv value env 211 | 212 | pushEnvMany :: List Value -> Env -> Env 213 | pushEnvMany values env = case values of { Nil -> env ; Cons v vs -> pushEnvMany vs (pushEnv1 v env) } 214 | 215 | pushEnvRec :: Int -> List Term -> Env -> Env 216 | pushEnvRec n tms env = RecEnv n tms env 217 | 218 | evalVar :: StaticEnv -> Env -> Var -> Value 219 | evalVar statEnv env var = case var of 220 | { IdxV idx -> indexEnv statEnv idx env 221 | ; StrV k -> case statEnv of { StaticEnv _dcons strlits _main -> stringToValue (index k strlits) } 222 | ; TopV _ -> error "evalVar: top-level references shouldn't appear in terms" 223 | ; LevV _ -> error "evalVar: de Bruijn levels shouldn't appear in terms" } 224 | 225 | evalAtom :: StaticEnv -> Env -> Atom -> Value 226 | evalAtom statEnv env atom = case atom of 227 | { VarA var -> evalVar statEnv env (forgetName var) 228 | ; KstA lit -> literalToValue lit 229 | ; ConA con -> ConV (forgetName con) Nil } 230 | 231 | mkLamV :: StaticEnv -> Env -> Term -> Value 232 | mkLamV statEnv env tm = LamV (\x -> eval statEnv (pushEnv1 x env) tm) 233 | 234 | -- eval :: StaticEnv -> Env -> Term -> IO Value 235 | -- eval statEnv env term = 236 | -- ioseq (iosequence_ 237 | -- [ putStrLn (append ">> env = " (showEnv env)) 238 | -- , putStrLn (append ">> term = " (showTerm term)) 239 | -- , putStrLn "" 240 | -- ]) 241 | -- (eval' statEnv env term) 242 | 243 | type EvalM a = IO a 244 | 245 | eval :: StaticEnv -> Env -> Term -> EvalM Value 246 | eval statEnv env term = case term of 247 | { AtmT atom -> ioreturn (evalAtom statEnv env atom) 248 | ; AppT e1 a2 -> iobind (eval statEnv env e1) (\val1 -> case val1 of 249 | { ConV con args -> ioreturn (ConV con (snoc args (evalAtom statEnv env a2))) 250 | ; LamV fun -> let { val2 = evalAtom statEnv env a2 } in fun val2 251 | ; _ -> error "eval: application to non-lambda" }) 252 | ; PriT primop vs -> case primop of { PrimOp _arity prim -> case statEnv of 253 | { StaticEnv dconNames _ _ -> evalPrimM dconNames prim (map (evalAtom statEnv env) vs) }} 254 | ; LZPT primop ts -> case primop of { PrimOp _arity prim -> case prim of 255 | { IFTE -> ternary ts (lazyIFTE statEnv env) 256 | ; And -> binary ts (lazyAnd statEnv env) 257 | ; Or -> binary ts (lazyOr statEnv env) 258 | ; _ -> error "eval: unrecognized lazy primop" }} 259 | ; LetT nt1 t2 -> iobind (eval statEnv env (forgetName nt1)) (\x -> eval statEnv (pushEnv1 x env) t2) 260 | ; LamT body -> ioreturn (mkLamV statEnv env (forgetName body)) 261 | ; CasT latom brs -> let { scrutinee = evalAtom statEnv env (located latom) } in case scrutinee of 262 | { ConV con args -> matchCon statEnv env con args brs 263 | ; _ -> error (concat [ "eval: branching on a non-constructor at " , showLocation (location latom), "; scrutinee = " , showValue' statEnv scrutinee ]) } 264 | ; RecT n nts tm -> let { env' = pushEnvRec n (map forgetName nts) env } in eval statEnv env' tm 265 | ; MainT -> case statEnv of { StaticEnv _ _ mainidx -> 266 | let { k = minus (lengthEnv env) (inc mainidx) } 267 | in (ioreturn (indexEnv statEnv k env)) } 268 | } 269 | 270 | matchCon :: StaticEnv -> Env -> Con -> List Value -> List BranchT -> EvalM Value 271 | matchCon statEnv env con args brs = go brs where 272 | { nargs = length args 273 | ; go branches = case branches of 274 | { Nil -> error "non-exhaustive patterns in case" 275 | ; Cons this rest -> case this of 276 | { DefaultT rhs -> eval statEnv env rhs 277 | ; BranchT bcon bnargs rhs -> ifte (and (eq con (forgetName bcon)) (eq nargs bnargs)) 278 | (eval statEnv (pushEnvMany args env) rhs) 279 | (go rest) } } } 280 | 281 | -------------------------------------------------------------------------------- 282 | -- ** Evaluating pure primops 283 | 284 | evalfunII :: (Int -> Int) -> Value -> Value 285 | evalfunII f v1 = intToValue (f (valueToInt v1)) 286 | 287 | evalfunIII :: (Int -> Int -> Int) -> Value -> Value -> Value 288 | evalfunIII f v1 v2 = intToValue (f (valueToInt v1) (valueToInt v2)) 289 | 290 | evalfunIIB :: (Int -> Int -> Bool) -> Value -> Value -> Value 291 | evalfunIIB f v1 v2 = boolToValue (f (valueToInt v1) (valueToInt v2)) 292 | 293 | evalfunBB :: (Bool -> Bool) -> Value -> Value 294 | evalfunBB f v1 = boolToValue (f (valueToBool v1)) 295 | 296 | evalfunBBB :: (Bool -> Bool -> Bool) -> Value -> Value -> Value 297 | evalfunBBB f v1 v2 = boolToValue (f (valueToBool v1) (valueToBool v2)) 298 | 299 | evalPrimPure :: Prim -> List Value -> Value 300 | evalPrimPure prim args = case prim of 301 | { Error -> unary args (compose error valueToString) 302 | ; Negate -> unary args (evalfunII negate) 303 | ; Plus -> binary args (evalfunIII plus ) 304 | ; Minus -> binary args (evalfunIII minus ) 305 | ; Times -> binary args (evalfunIII times ) 306 | ; Div -> binary args (evalfunIII div ) 307 | ; Mod -> binary args (evalfunIII mod ) 308 | ; BitAnd -> binary args (evalfunIII bitAnd) 309 | ; BitOr -> binary args (evalfunIII bitOr ) 310 | ; BitXor -> binary args (evalfunIII bitXor ) 311 | ; ShiftL -> binary args (evalfunIII shiftL) 312 | ; ShiftR -> binary args (evalfunIII shiftR) 313 | ; Chr -> unary args (compose3 charToValue chr valueToInt ) 314 | ; Ord -> unary args (compose3 intToValue ord valueToChar) 315 | ; Not -> unary args (evalfunBB not ) 316 | ; IntEQ -> binary args (evalfunIIB eq ) 317 | ; IntLT -> binary args (evalfunIIB lt ) 318 | ; IntLE -> binary args (evalfunIIB le ) 319 | ; StdIn -> nullary args (handleToValue stdin ) 320 | ; StdOut -> nullary args (handleToValue stdout) 321 | ; StdErr -> nullary args (handleToValue stderr) 322 | ; _ -> error (append "evalPrim: unimplemented primop: " (quoteString (showPrim prim))) 323 | } 324 | -- ; GenEQ -> binary args (\x y -> boolToValue (eqValue x y)) 325 | -- ; IFTE -> error "ifte: this has to be implemented somewhere else because of lazyness" 326 | -- ; And -> binary args (evalfunBBB and ) 327 | -- ; Or -> binary args (evalfunBBB or ) 328 | 329 | -------------------------------------------------------------------------------- 330 | -- ** Evaluating IO primops 331 | 332 | ioCompose1 :: (c -> d) -> (b -> IO c) -> (a -> b) -> a -> IO d 333 | ioCompose1 post action pre = \x -> iofmap post (action (pre x)) 334 | 335 | ioCompose2 :: (c -> d) -> (b1 -> b2 -> IO c) -> (a1 -> b1) -> (a2 -> b2) -> a1 -> a2 -> IO d 336 | ioCompose2 post action pre1 pre2 = \x y -> iofmap post (action (pre1 x) (pre2 y)) 337 | 338 | -- | Note: since the evaluator is in the IO monad, since it can evaluate 339 | -- side effects, this also results in @(Value -> EvalM Value)@ instead 340 | -- of @(Value -> Value)@ 341 | valueToLambda :: Value -> (Value -> EvalM Value) 342 | valueToLambda val = case val of { LamV fun -> fun ; _ -> error "marshalling: not a lambda" } 343 | 344 | actionToValue :: Action -> Value 345 | actionToValue action = IO_V action 346 | 347 | valueToAction :: Value -> Action 348 | valueToAction val = case val of { IO_V action -> action ; _ -> error "marshalling: not an IO action" } 349 | 350 | actionJoin :: IO Action -> Action 351 | actionJoin io = Action (iojoin (iofmap runAction io)) 352 | 353 | bindAction :: Action -> (Value -> Action) -> Action 354 | bindAction act1 fun2 = Action (iobind (runAction act1) (compose runAction fun2)) 355 | 356 | -- | Assuming the values are the right shape for @iobind@, we bind them 357 | ioBindValue :: Value -> Value -> Value 358 | ioBindValue actionVal funVal = actionToValue (bindAction a b) where 359 | { a = valueToAction actionVal 360 | ; b = \val -> actionJoin (iofmap valueToAction (valueToLambda funVal val)) } 361 | 362 | -- | Note: This has to be The environment is only used for @IOBind@, which takes a lambda as second argument. 363 | evalPrimM :: DConNames -> Prim -> List Value -> EvalM Value 364 | evalPrimM dconNames prim args = case prim of 365 | { RunIO -> runAction (unary args valueToAction) 366 | ; _ -> ioreturn (evalPrimPureIO dconNames prim args) } 367 | 368 | evalPrimPureIO :: DConNames -> Prim -> List Value -> Value 369 | evalPrimPureIO dconNames prim args = case prim of 370 | { GetChar -> mkIO (nullary args (iofmap maybeCharToValue getChar ) ) 371 | ; GetArg -> mkIO (unary args (ioCompose1 maybeStringToValue getArg (compose inc2 valueToInt)) ) 372 | ; PutChar -> mkIO (unary args (ioCompose1 unitToValue putChar valueToChar ) ) 373 | ; Exit -> mkIO (unary args (ioCompose1 unitToValue exit valueToInt ) ) 374 | ; OpenFile -> mkIO (binary args (ioCompose2 handleToValue openFile valueToString valueToIOMode) ) 375 | ; HClose -> mkIO (unary args (ioCompose1 unitToValue hClose valueToHandle) ) 376 | ; HGetChar -> mkIO (unary args (ioCompose1 maybeCharToValue hGetChar valueToHandle) ) 377 | ; HPutChar -> mkIO (binary args (ioCompose2 unitToValue hPutChar valueToHandle valueToChar ) ) 378 | ; HPutStr -> mkIO (binary args (ioCompose2 unitToValue hPutStr valueToHandle valueToString) ) 379 | ; Print -> mkIO (unary args (ioCompose1 unitToValue putStrLn (showValue dconNames)) ) 380 | ; IOReturn -> mkIO (unary args ioreturn) 381 | ; IOBind -> binary args ioBindValue 382 | ; _ -> evalPrimPure prim args 383 | } where { mkIO ioaction = actionToValue (Action ioaction) } 384 | 385 | -------------------------------------------------------------------------------- 386 | -- *** Lazy primops 387 | 388 | lazyIFTE :: StaticEnv -> Env -> Term -> Term -> Term -> IO Value 389 | lazyIFTE statEnv env tb tx ty = iobind (eval statEnv env tb) (\vb -> 390 | ifte (valueToBool vb) (eval statEnv env tx) (eval statEnv env ty)) 391 | 392 | lazyAnd :: StaticEnv -> Env -> Term -> Term -> IO Value 393 | lazyAnd statEnv env t1 t2 = iobind (eval statEnv env t1) (\v1 -> 394 | ifte (valueToBool v1) (eval statEnv env t2) (ioreturn (boolToValue False))) 395 | 396 | lazyOr :: StaticEnv -> Env -> Term -> Term -> IO Value 397 | lazyOr statEnv env t1 t2 = iobind (eval statEnv env t1) (\v1 -> 398 | ifte (valueToBool v1) (ioreturn (boolToValue True)) (eval statEnv env t2)) 399 | 400 | -------------------------------------------------------------------------------- 401 | 402 | -------------------------------------------------------------------------------- /Inliner.hs: -------------------------------------------------------------------------------- 1 | 2 | -- | Inlining small functions + some basic optimizations 3 | 4 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-} 5 | {-# LANGUAGE Strict #-} 6 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-} 7 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-} 8 | 9 | module Inliner where 10 | 11 | -------------------------------------------------------------------------------- 12 | 13 | import Prelude ( Int , Char , Eq , Show ) 14 | import PrimGHC 15 | 16 | -------------------------------------------------------------------------------- 17 | 18 | import Base 19 | import Containers 20 | import Types 21 | import Core 22 | 23 | {-% include "Base.hs" %-} 24 | {-% include "Containers.hs" %-} 25 | {-% include "Types.hs" %-} 26 | {-% include "Core.hs" %-} 27 | 28 | -------------------------------------------------------------------------------- 29 | -- ** Inliner 30 | 31 | data InlineTm 32 | = NoInline 33 | | Inline Level Term 34 | deriving Show 35 | 36 | type InlineEnv = List InlineTm 37 | 38 | -- | temp debugging 39 | data TermSize = TermSize Int Int deriving Show 40 | 41 | -- | NB: we should never inline stuff which does computation! 42 | isInlineableTerm :: Term -> Bool 43 | isInlineableTerm term = case term of 44 | { LamT _ -> True 45 | ; AtmT _ -> True 46 | ; LetT _ t -> isInlineableTerm t 47 | ; _ -> False } 48 | 49 | insertIfSmall :: Size -> Level -> Term -> InlineEnv -> InlineEnv 50 | insertIfSmall limit level term scope = case isInlineableTerm term of 51 | { True -> let { this = ifte (gt (termSize term) limit) NoInline (Inline level term) } in (Cons this scope) 52 | ; False -> Cons NoInline scope } 53 | 54 | addNoInlines :: Int -> InlineEnv -> InlineEnv 55 | addNoInlines n env = ifte (le n 0) env (addNoInlines (dec n) (Cons NoInline env)) 56 | 57 | optimizeCorePrg :: CoreProgram -> CoreProgram 58 | optimizeCorePrg prg = let { limit = 24 } in 59 | (inlineCorePrg limit (inlineCorePrg limit 60 | (inlineCorePrg limit (inlineCorePrg limit 61 | (inlineCorePrg limit (inlineCorePrg limit prg)))))) 62 | 63 | inlineCorePrg :: Size -> CoreProgram -> CoreProgram 64 | inlineCorePrg sizeLimit coreprg = case coreprg of 65 | { CorePrg blocks mainIdx mainTerm -> let 66 | { bigterm' = removeUnusedLets mainIdx (inlineLets sizeLimit (programToTerm blocks)) 67 | ; mainIdx' = findToplevelMain bigterm' 68 | ; blocks' = termToProgram bigterm' 69 | ; mainTerm' = AtmT (VarA (Named "$main" (TopV mainIdx'))) 70 | } in CorePrg blocks' mainIdx' mainTerm' } 71 | 72 | inlineLets :: Size -> Term -> Term 73 | inlineLets sizeLimit term = go 0 Nil term where 74 | { goIdx name level env i = let { what = index i env } in case what of 75 | { NoInline -> AtmT (VarA (Named name (IdxV i))) 76 | ; Inline ilevel iterm -> shiftTerm ilevel level iterm } 77 | ; goAtom level env atom = case atom of { VarA nvar -> case nvar of { Named name var -> case var of 78 | { LevV j -> goIdx name level env (minus level (inc j)) 79 | ; IdxV i -> goIdx name level env i 80 | ; _ -> AtmT atom } } ; _ -> AtmT atom } 81 | ; go level env term = case term of 82 | { AtmT atom -> goAtom level env atom 83 | ; LamT nt -> LamT (nfmap (go (inc level) (Cons NoInline env)) nt) 84 | ; AppT t a -> AppT (go level env t) a 85 | ; PriT p as -> PriT p as 86 | ; LZPT p ts -> LZPT p (map (go level env) ts) 87 | -- ; LetT nt1 t2 -> let { nt1' = (nfmap (go level env) nt1) 88 | ; LetT nt1 t2 -> let { nt1' = nfmap (\t -> betaReduceTerm level (go level env t)) nt1 89 | ; env' = insertIfSmall sizeLimit level (forgetName nt1') env } 90 | in LetT nt1' (go (inc level) env' t2) 91 | ; RecT n nts t -> let { level' = plus level n ; env' = addNoInlines n env } 92 | in RecT n (map (nfmap (go level' env')) nts) (go level' env' t) 93 | ; CasT la brs -> CasT la (map (goBranch level env) brs) 94 | ; MainT -> MainT } 95 | ; goBranch level env branch = case branch of 96 | { BranchT c n t -> BranchT c n (go (plus level n) (addNoInlines n env) t) 97 | ; DefaultT t -> DefaultT (go level env t) } } 98 | 99 | -------------------------------------------------------------------------------- 100 | 101 | -- ex1 = LamT (Named "x" ( 102 | -- LetT (Named "p" (AtmT (KstA (IntL 666)))) 103 | -- (PriT (PrimOp 2 Plus) [ VarA (Named "p" (IdxV 0)) , KstA (IntL 1) ] ))) 104 | -- 105 | -- ex2 = LamT (Named "x" ( 106 | -- LetT (Named "p" (AtmT (KstA (IntL 666)))) 107 | -- (PriT (PrimOp 2 Plus) [ VarA (Named "x" (IdxV 1)) , KstA (IntL 1) ] ))) 108 | 109 | -------------------------------------------------------------------------------- 110 | 111 | -- | NB: we have to know the @main@, otherwise we would eliminate /everything/ 112 | removeUnusedLets :: TopIdx -> Term -> Term 113 | removeUnusedLets mainIdx term = snd (go 0 term) where 114 | { 115 | -- goLev :: Level -> Name -> Level -> Pair IntSet (Named Var) 116 | goLev level name j = Pair (setSingleton j) (Named name (LevV j)) 117 | -- goAtom :: Level -> Atom -> Pair IntSet Atom 118 | ; goAtom level atom = case atom of { VarA nvar -> case nvar of { Named name var -> case var of 119 | { LevV j -> second VarA (goLev level name j) 120 | ; IdxV i -> second VarA (goLev level name (minus level (inc i))) 121 | ; _ -> Pair setEmpty atom } } ; _ -> Pair setEmpty atom } 122 | -- go :: Level -> Term -> Pair IntSet Term 123 | ; go level term = 124 | -- debug "level" level (debug "term" term (debug_ "term'" ( 125 | case term of 126 | { AtmT atom -> second AtmT (goAtom level atom) 127 | ; LamT nt -> case go (inc level) (forgetName nt) of { Pair set t' -> 128 | Pair (setDelete level set) (LamT (Named (nameOf nt) t')) } 129 | ; AppT t a -> case go level t of { Pair set1 t' -> 130 | case goAtom level a of { Pair set2 a' -> Pair (setUnion set1 set2) (AppT t' a') }} 131 | ; PriT p as -> let { pairs = map (goAtom level) as } in Pair (setUnions (map fst pairs)) (PriT p (map snd pairs)) 132 | ; LZPT p ts -> let { pairs = map (go level) ts } in Pair (setUnions (map fst pairs)) (LZPT p (map snd pairs)) 133 | ; LetT nt1 t2 -> case nt1 of { Named name t1 -> case go level t1 of { Pair free1 t1' -> 134 | case go (inc level) t2 of { Pair free2 t2' -> ifte (setMember level free2) 135 | (Pair (setUnion free1 (setDelete level free2)) (LetT (Named name t1') t2')) 136 | (Pair free2 (shiftTerm (inc level) level t2')) }}} 137 | ; RecT n nts tm -> let { level' = plus level n ; levels = rangeFrom level n 138 | ; pairs = map (\nt -> case nt of { Named named t -> case go level' t of 139 | { Pair set t' -> Pair set (Named named t') }}) nts } 140 | in case go level' tm of { Pair free2 tm' -> ifte (setIsDisjoint levels free2) 141 | (Pair free2 (shiftTerm level' level tm')) 142 | (Pair (setUnion free2 (setDeleteMany levels (setUnions (map fst pairs)))) 143 | (RecT n (map snd pairs) tm')) } 144 | ; CasT la brs -> case la of { Located loc a -> case goAtom level a of { Pair set1 a' -> 145 | let { pairs = map (goBranch level) brs } 146 | in Pair (setUnion set1 (setUnions (map fst pairs))) (CasT (Located loc a') (map snd pairs)) }} 147 | ; MainT -> Pair (setSingleton mainIdx) MainT } 148 | -- (AtmT (VarA (Named "$main" (LevV mainIdx)))) } ))) 149 | -- goBranch :: Level -> BranchT -> Pair IntSet BranchT 150 | ; goBranch level branch = case branch of 151 | { DefaultT rhs -> case go level rhs of { Pair set rhs' -> Pair set (DefaultT rhs') } 152 | ; BranchT c n rhs -> let { level' = plus level n } in case go level' rhs of { Pair set rhs' -> 153 | let { set' = setDeleteMany (rangeFrom level n) set } in Pair set' (BranchT c n rhs') }} } 154 | 155 | -------------------------------------------------------------------------------- 156 | -- ** Beta reduction 157 | 158 | --_id = LamT (Named "a" ( AtmT (VarA (Named "a" (IdxV 0))))) 159 | --_const = LamT (Named "x" ( 160 | -- LamT (Named "y" ( AtmT (VarA (Named "x" (IdxV 1))))))) 161 | --_kst5 = (KstA (IntL 5)) 162 | --_kst8 = (KstA (IntL 8)) 163 | --_idx0 = VarA (Named "$0" (IdxV 0)) 164 | --_idx1 = VarA (Named "$1" (IdxV 1)) 165 | --_idx2 = VarA (Named "$2" (IdxV 2)) 166 | --app1 f x = AppT f x 167 | --app2 f x y = AppT (AppT f x) y 168 | 169 | betaReduceTerm :: Level -> Term -> Term 170 | betaReduceTerm level term = transformTerm worker level term where 171 | { worker level term = case term of { AppT f arg -> case f of { LamT nbody -> case nbody of { Named name body -> 172 | let { arg' = atomIndexToLevel level arg 173 | ; body' = transformAtoms (substitute1 level arg') (inc level) body 174 | ; final = shiftTerm (inc level) level body' } 175 | in final 176 | -- debug "=========================" Unit ( 177 | -- debug "level" level( 178 | -- debug "f/arg" (Pair (showTerm f) (showAtom arg)) ( 179 | -- debug "body" (showTerm body) ( 180 | -- debug "arg'" (showAtom arg' ) ( 181 | -- debug "body'" (showTerm body') ( 182 | -- debug "final" (showTerm final) 183 | -- final)))))) 184 | } ; _ -> term } ; _ -> term } 185 | ; substitute1 level0 what level atom = 186 | let { handleLev name j = ifte (eq j level0) what (VarA (Named name (LevV j))) } 187 | in case atom of { VarA nvar -> case nvar of { Named name var -> case var of 188 | { IdxV i -> handleLev name (minus level (inc i)) 189 | ; LevV j -> handleLev name j 190 | ; _ -> atom }} ; _ -> atom } } 191 | 192 | -------------------------------------------------------------------------------- 193 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2021, Balazs Komuves 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | - Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | 10 | - Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | - Neither names of the copyright holders nor the names of the contributors 15 | may be used to endorse or promote products derived from this software without 16 | specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 22 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 23 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 24 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 25 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 26 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 27 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | -------------------------------------------------------------------------------- /Nano.hs: -------------------------------------------------------------------------------- 1 | 2 | -- | The compiler main module 3 | -- 4 | 5 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-} 6 | {-# LANGUAGE Strict #-} 7 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-} 8 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-} 9 | 10 | module Nano where 11 | 12 | -------------------------------------------------------------------------------- 13 | 14 | import Prelude ( Int , Char , Eq , Show ) 15 | import PrimGHC 16 | 17 | -------------------------------------------------------------------------------- 18 | 19 | import Base 20 | import Containers 21 | import Types 22 | import PrimOps 23 | import DataCon 24 | import Syntax 25 | import Parser 26 | import Dependency 27 | import Core 28 | import ScopeCheck 29 | import Inliner 30 | import Closure 31 | import CodeGen 32 | import Eval 33 | 34 | {-% include "Base.hs" %-} 35 | {-% include "Containers.hs" %-} 36 | {-% include "Types.hs" %-} 37 | {-% include "PrimOps.hs" %-} 38 | {-% include "DataCon.hs" %-} 39 | {-% include "Syntax.hs" %-} 40 | {-% include "Parser.hs" %-} 41 | {-% include "Dependency.hs" %-} 42 | {-% include "Core.hs" %-} 43 | {-% include "ScopeCheck.hs" %-} 44 | {-% include "Inliner.hs" %-} 45 | {-% include "Closure.hs" %-} 46 | {-% include "CodeGen.hs" %-} 47 | {-% include "Eval.hs" %-} 48 | 49 | -------------------------------------------------------------------------------- 50 | -- * Compiler entry point 51 | 52 | -- | GHC \/ nanohs shared entry point 53 | main = runIO nanoMain 54 | 55 | -- | Nano entry point 56 | nanoMain :: IO Unit 57 | nanoMain = iobind getArgs (\args -> case args of { Nil -> printUsage ; Cons cmd args' -> handleCommand cmd args' }) where 58 | { handleCommand cmd args = case cmd of { Cons dash cmd1 -> ifte (cneq dash '-') printUsage (case cmd1 of { Cons c _ -> 59 | ifte (ceq c 'i') (interpret args) ( 60 | ifte (ceq c 'c') (compile False args) ( 61 | ifte (ceq c 'o') (compile True args) printUsage)) })} 62 | ; interpret args = case args of { Cons input rest -> runInterpreter input ; _ -> printUsage } 63 | ; compile flag args = case args of { Cons input rest -> case rest of { Cons output _ -> runCompiler flag input output ; _ -> printUsage } ; _ -> printUsage }} 64 | 65 | printUsage :: IO Unit 66 | printUsage = iomapM_ putStrLn 67 | [ "usage:" 68 | , "./nanohs -i # interpret" 69 | , "./nanohs -c # compile without optimizations" 70 | , "./nanohs -o # compile with optimizations" ] 71 | 72 | runCompiler :: Bool -> FilePath -> FilePath -> IO Unit 73 | runCompiler optimize inputFn outputFn = iobind (loadModules Compile inputFn) (\prgdata -> case prgdata of { 74 | PrgData strlits dconTrie coreprg -> 75 | iosequence_ 76 | [ putStrLn (append "compiling with optimizations " (ifte optimize "enabled" "disabled")) 77 | , let { lprogram = coreProgramToLifted (ifte optimize (optimizeCorePrg coreprg) coreprg) 78 | ; code = runCodeGenM_ (liftedProgramToCode inputFn strlits dconTrie lprogram) 79 | } in writeLines outputFn code 80 | , putStrLn "done." ]}) 81 | 82 | runInterpreter :: FilePath -> IO Unit 83 | runInterpreter inputFn = iobind (loadModules Interpret inputFn) (\prgdata -> case prgdata of { 84 | PrgData strlits dconTrie coreprg -> case coreprg of { CorePrg blocks mainIdx _main -> 85 | ioseq (putStrLn "interpreting...") (let 86 | { bigterm = termLevelsToIndices 0 (programToTerm blocks) 87 | ; dconNames = mapFromList (map swap (trieToList dconTrie)) 88 | ; staticEnv = StaticEnv dconNames strlits mainIdx 89 | } in iobind (eval staticEnv NilEnv bigterm) (printValue dconNames) )}}) 90 | 91 | -------------------------------------------------------------------------------- 92 | -- ** Load and parse source files 93 | 94 | data ProgramData = PrgData (List String) DataConTable CoreProgram 95 | 96 | loadModules :: Mode -> FilePath -> IO ProgramData 97 | loadModules mode inputFn = 98 | iobind (loadAndParse1 Nil inputFn) (\pair -> case pair of { Pair files toplevs -> (let 99 | { defins0 = catMaybes (map mbDefin toplevs) 100 | ; dpair = extractStringConstants defins0 } in case dpair of { Pair strlits defins1 -> let 101 | { dconTrie = collectDataCons (map located defins1) 102 | ; program = reorderProgram defins1 103 | ; coreprg = programToCoreProgram mode dconTrie program 104 | } in ioreturn (PrgData strlits dconTrie coreprg) })}) 105 | 106 | type Files = List FilePath 107 | type Loaded = Pair Files (List TopLevel) 108 | 109 | loadAndParseMany :: Files -> List FilePath -> IO Loaded 110 | loadAndParseMany sofar fnames = case fnames of { Nil -> ioreturn (Pair sofar Nil) ; Cons this rest -> 111 | iobind (loadAndParse1 sofar this) (\loaded -> case loaded of { Pair sofar' toplevs1 -> 112 | iobind (loadAndParseMany sofar' rest) (\loaded -> case loaded of { Pair sofar'' toplevs2 -> 113 | ioreturn (Pair sofar'' (append toplevs1 toplevs2)) }) }) } 114 | 115 | loadAndParse1 :: Files -> FilePath -> IO Loaded 116 | loadAndParse1 sofar fname = case stringMember fname sofar of 117 | { True -> ioreturn (Pair sofar Nil) 118 | ; False -> ioseq (putStrLn (append "+ " fname)) (iobind (readFile fname) (\text -> let 119 | { blocks = lexer fname text 120 | ; toplevs = map (parseTopLevelBlock fname) blocks 121 | ; includes = filterIncludes toplevs 122 | ; sofar' = Cons fname sofar 123 | } in iobind (loadAndParseMany sofar' includes) (\loaded -> case loaded of { Pair sofar'' toplevs2 -> 124 | ioreturn (Pair sofar'' (append toplevs toplevs2)) }))) } 125 | 126 | -------------------------------------------------------------------------------- 127 | -------------------------------------------------------------------------------- /Parser.hs: -------------------------------------------------------------------------------- 1 | 2 | -- | Parsing 3 | 4 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-} 5 | {-# LANGUAGE Strict #-} 6 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-} 7 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-} 8 | 9 | module Parser where 10 | 11 | -------------------------------------------------------------------------------- 12 | 13 | import Prelude ( Int , Char , Eq , Show ) 14 | import PrimGHC 15 | 16 | -------------------------------------------------------------------------------- 17 | 18 | import Base 19 | import Types 20 | import Syntax 21 | 22 | {-% include "Base.hs" %-} 23 | {-% include "Types.hs" %-} 24 | {-% include "Syntax.hs" %-} 25 | 26 | -------------------------------------------------------------------------------- 27 | -- * Parsing 28 | 29 | mbVar :: Token -> Maybe Name 30 | mbLit :: Token -> Maybe Literal 31 | mbSpec :: Token -> Maybe Special 32 | 33 | mbVar tok = case tok of { VarTok v -> Just v ; _ -> Nothing } 34 | mbLit tok = case tok of { LitTok l -> Just l ; _ -> Nothing } 35 | mbSpec tok = case tok of { SpecTok s -> Just s ; _ -> Nothing } 36 | 37 | mbVarL :: Token -> Maybe Name 38 | mbVarU :: Token -> Maybe Name 39 | mbVarL tok = case mbVar tok of { Just str -> ifte (isLower_ (head str)) (Just str) Nothing } 40 | mbVarU tok = case mbVar tok of { Just str -> ifte (isUpper (head str)) (Just str) Nothing } 41 | 42 | mbStrLit :: Token -> Maybe String 43 | mbStrLit tok = case mbLit tok of { Nothing -> Nothing ; Just lit -> 44 | case lit of { StrL s -> Just s ; _ -> Nothing } } 45 | 46 | isNotWhite :: Token -> Bool 47 | isNotWhite tok = case tok of { WhiteTok -> False ; _ -> True } 48 | 49 | locatedL :: Parser tok a -> Parser tok (Located a) 50 | locatedL parser = pbind getFn (\fname -> 51 | pbind getSrcPos (\pos1 -> 52 | pbind parser (\what -> 53 | pbind getSrcPos (\pos2 -> preturn (Located (Loc fname pos1 pos2) what))))) 54 | 55 | type LList a = List (Located a) 56 | type LString = LList Char 57 | 58 | addLocations :: FilePath -> String -> LString 59 | addLocations fname text = go startSrcPos text where { go pos str = case str of { Nil -> Nil 60 | ; Cons x xs -> let { pos' = nextSrcPos x pos } in Cons (Located (Loc fname pos pos') x) (go pos' xs) } } 61 | 62 | -------------------------------------------------------------------------------- 63 | -- ** Parser combinators 64 | 65 | data ParseResult tok a 66 | = ParseOk a SrcPos (LList tok) 67 | | ParseErr SrcPos 68 | deriving Show 69 | 70 | type Parser tok a = FilePath -> SrcPos -> LList tok -> ParseResult tok a 71 | 72 | runParser' :: FilePath -> Parser tok a -> LList tok -> ParseResult tok a 73 | runParser' fname parser input = case input of { Nil -> error "empty input" 74 | ; Cons locx _ -> parser fname (locatedStart locx) input } 75 | 76 | runParser :: FilePath -> Parser tok a -> LList tok -> Either String a 77 | runParser fname p input = case runParser' fname p input of 78 | { ParseOk x pos rest -> case rest of 79 | { Nil -> Right x 80 | ; Cons _ _ -> Left (append "unexpected extra token at " (showSrcPos' fname pos)) } 81 | ; ParseErr pos -> Left (append "parse error at " (showSrcPos' fname pos)) } 82 | 83 | runParser_ :: FilePath -> Parser tok a -> LList tok -> a 84 | runParser_ fname p input = case runParser fname p input of { Right y -> y ; Left msg -> error msg } 85 | 86 | preturn :: a -> Parser tok a 87 | preturn x fn pos str = ParseOk x pos str 88 | 89 | pfail :: Parser tok a 90 | pfail fn pos str = ParseErr pos 91 | 92 | pfmap :: (a -> b) -> Parser tok a -> Parser tok b 93 | pfmap f p = \fn pos str -> case p fn pos str of 94 | { ParseOk x pos' str' -> ParseOk (f x) pos' str' 95 | ; ParseErr pos' -> ParseErr pos' } 96 | 97 | preplace :: b -> Parser tok a -> Parser tok b 98 | preplace x = pfmap (const x) 99 | 100 | pbind :: Parser tok a -> (a -> Parser tok b) -> Parser tok b 101 | pbind p u = \fn pos str -> case p fn pos str of 102 | { ParseOk x pos' str' -> u x fn pos' str' 103 | ; ParseErr pos' -> ParseErr pos' } 104 | 105 | pseq :: Parser tok a -> Parser tok b -> Parser tok b 106 | pseq p q = pbind p (\_ -> q) 107 | 108 | ppost :: Parser tok a -> Parser tok b -> Parser tok a 109 | ppost p q = pbind p (\x -> pbind q (\_ -> preturn x)) 110 | 111 | getFn :: Parser tok FilePath 112 | getFn = \fn pos str -> ParseOk fn pos str 113 | 114 | getSrcPos :: Parser tok SrcPos 115 | getSrcPos = \fn pos str -> ParseOk pos pos str 116 | 117 | alternative :: Parser tok a -> Parser tok a -> Parser tok a 118 | alternative p q = \fn pos str -> case p fn pos str of 119 | { ParseOk x pos' str' -> ParseOk x pos' str' 120 | ; ParseErr _ -> q fn pos str } 121 | 122 | choice :: List (Parser tok a) -> Parser tok a 123 | choice list = case list of { Nil -> pfail ; Cons p ps -> alternative p (choice ps) } 124 | 125 | optional :: Parser tok a -> Parser tok (Maybe a) 126 | optional p = \fn pos str -> case p fn pos str of 127 | { ParseOk x pos' str' -> ParseOk (Just x) pos' str' 128 | ; ParseErr _pos' -> ParseOk Nothing pos str } 129 | 130 | option :: a -> Parser tok a -> Parser tok a 131 | option x0 p = \fn pos str -> case p fn pos str of 132 | { ParseOk x pos' str' -> ParseOk x pos' str' 133 | ; ParseErr _pos' -> ParseOk x0 pos str } 134 | 135 | many :: Parser tok a -> Parser tok (List a) 136 | many p = pbind (optional p) (\mb -> case mb of 137 | { Nothing -> preturn Nil ; Just x -> pbind (many p) (\xs -> preturn (Cons x xs)) }) 138 | 139 | many1 :: Parser tok a -> Parser tok (List a) 140 | many1 p = pbind p (\x -> 141 | pbind (many p) (\xs -> preturn (Cons x xs))) 142 | 143 | manyTill :: Parser tok end -> Parser tok a -> Parser tok (List a) 144 | manyTill end p = go Fake where { go Fake = alternative (preplace Nil end) 145 | (pbind p (\x -> pbind (go Fake) (\xs -> preturn (Cons x xs)))) } 146 | 147 | sepEndBy :: Parser tok sep -> Parser tok end -> Parser tok a -> Parser tok (List a) 148 | sepEndBy sep end p = alternative (preplace Nil end) (sepEndBy1 sep end p) 149 | 150 | sepEndBy1 :: Parser tok sep -> Parser tok end -> Parser tok a -> Parser tok (List a) 151 | sepEndBy1 sep end p = pbind p (\x -> alternative 152 | (preplace (Cons x Nil) end) 153 | (pseq sep (pbind (sepEndBy sep end p) (\xs -> preturn (Cons x xs))))) 154 | 155 | accept :: (tok -> Maybe a) -> Parser tok a 156 | accept f fn pos toks = case toks of 157 | { Nil -> ParseErr pos 158 | ; Cons locx xs -> case locx of { Located loc x -> case f x of 159 | { Just y -> ParseOk y (locEnd loc) xs 160 | ; Nothing -> ParseErr pos } } } 161 | 162 | satisfy :: (tok -> Bool) -> Parser tok tok 163 | satisfy f fn pos toks = case toks of 164 | { Nil -> ParseErr pos 165 | ; Cons locx xs -> case locx of { Located loc x -> case f x of 166 | { True -> ParseOk x (locEnd loc) xs 167 | ; False -> ParseErr pos } } } 168 | 169 | anyToken :: Parser tok tok 170 | anyToken = satisfy (\_ -> True) 171 | 172 | token :: Eq tok => tok -> Parser tok tok 173 | token t = satisfy (geq t) 174 | 175 | tokens :: Eq tok => List tok -> Parser tok (List tok) 176 | tokens toks = case toks of { Nil -> preturn Nil ; Cons t ts -> 177 | pbind (token t) (\x -> pbind (tokens ts) (\xs -> preturn (Cons x xs))) } 178 | 179 | charToken :: Char -> Parser Char Char 180 | charToken t = satisfy (ceq t) 181 | 182 | charTokens :: String -> Parser Char String 183 | charTokens toks = case toks of { Nil -> preturn Nil ; Cons t ts -> 184 | pbind (charToken t) (\x -> pbind (charTokens ts) (\xs -> preturn (Cons x xs))) } 185 | 186 | oneOf :: Eq tok => List tok -> Parser tok tok 187 | oneOf list = satisfy (\x -> elem x list) 188 | 189 | noneOf :: Eq tok => List tok -> Parser tok tok 190 | noneOf list = satisfy (\x -> not (elem x list)) 191 | 192 | charOneOf :: List Char -> Parser Char Char 193 | charOneOf list = satisfy (\x -> charElem x list) 194 | 195 | charNoneOf :: List Char -> Parser Char Char 196 | charNoneOf list = satisfy (\x -> not (charElem x list)) 197 | 198 | eof :: Parser tok Unit 199 | eof fn pos str = case str of { Nil -> ParseOk Unit pos str ; Cons _ _ -> ParseErr pos } 200 | 201 | -------------------------------------------------------------------------------- 202 | -- * Parsing the pseudo-Haskell syntax 203 | -- ** Lexer 204 | 205 | type Lexer a = Parser Char a 206 | 207 | digitL = satisfy isDigit 208 | lowerL = satisfy isLower 209 | upperL = satisfy isUpper 210 | alphaL = satisfy isAlpha 211 | _alphaL = satisfy (\ch -> or (ceq ch '_') (isAlpha ch)) 212 | alphaNumL = satisfy isAlphaNum 213 | 214 | spaces0 :: Lexer Int 215 | spaces0 = pfmap length (many (charToken ' ')) 216 | 217 | spaces1 :: Lexer Int 218 | spaces1 = pfmap length (many1 (charToken ' ')) 219 | 220 | digitsL :: Lexer String 221 | digitsL = many1 digitL 222 | 223 | natL :: Lexer Int 224 | natL = pbind digitsL (\xs -> case readNat xs of { Just n -> preturn n ; Nothing -> pfail }) 225 | 226 | intL :: Lexer Int 227 | intL = pbind (optional (charToken '-')) 228 | (\mb -> case mb of { Nothing -> natL ; Just _ -> pfmap negate natL }) 229 | 230 | charLiteralL :: Lexer Char 231 | charLiteralL = 232 | pbind (charToken singleQuoteC) (\_ -> 233 | pbind (anyToken ) (\c -> 234 | pbind (charToken singleQuoteC) (\_ -> preturn c))) 235 | 236 | stringLiteralL :: Lexer String 237 | stringLiteralL = 238 | pbind (charToken doubleQuoteC) (\_ -> 239 | pbind (many (satisfy (cneq doubleQuoteC))) (\xs -> 240 | pbind (charToken doubleQuoteC) (\_ -> preturn xs))) 241 | 242 | identifierL :: Lexer Name 243 | identifierL = pbind _alphaL (\x -> 244 | pbind (many (alternative alphaNumL (charOneOf "_'#"))) (\xs -> 245 | preturn (Cons x xs))) 246 | 247 | literalL :: Lexer Literal 248 | literalL = choice 249 | [ pfmap IntL intL 250 | , pfmap ChrL charLiteralL 251 | , pfmap StrL stringLiteralL ] 252 | 253 | specialL :: Lexer Special 254 | specialL = choice 255 | [ preplace LParen (charToken '(' ) 256 | , preplace RParen (charToken ')' ) 257 | , preplace LBrace (charToken '{' ) 258 | , preplace RBrace (charToken '}' ) 259 | , preplace LBracket (charToken '[' ) 260 | , preplace RBracket (charToken ']' ) 261 | , preplace Arrow (charTokens "->") 262 | , preplace DArrow (charTokens "=>") 263 | , preplace HasType (charTokens "::") 264 | , preplace Comma (charToken ',' ) 265 | , preplace Semicolon (charToken ';' ) 266 | , preplace EqualSign (charToken '=' ) 267 | , preplace Pipe (charToken '|' ) 268 | , preplace Lambda (charToken backslashC) 269 | , preplace Dot (charToken '.' ) 270 | ] 271 | 272 | lexemeL :: Lexer Token 273 | lexemeL = choice 274 | [ pfmap LitTok literalL 275 | , pfmap SpecTok specialL 276 | , pfmap VarTok identifierL 277 | , preplace WhiteTok spaces1 278 | ] 279 | 280 | -- | 0x0A, or 0x0D, or 0x0D,0x0A. 281 | eol_ :: Lexer Unit 282 | eol_ = alternative linux windows where 283 | { linux = preplace Unit (charToken newlineC) 284 | ; windows = preplace Unit (pseq (charToken carriageReturnC) (optional (charToken newlineC))) } 285 | 286 | eol :: Lexer Unit 287 | eol = alternative eol_ eof 288 | 289 | eolIndent :: Lexer Int 290 | eolIndent = alternative (pseq eol_ spaces0) (preplace 0 eof) 291 | 292 | -- | with EOL 293 | commentL :: Lexer String 294 | commentL = ppost commentL' eol 295 | 296 | -- | without EOL 297 | commentL' :: Lexer String 298 | commentL' = choice [ comment1 , comment2 , comment3 ] where 299 | { comment1 = pseq (charTokens "--" ) (many (charNoneOf [newlineC,carriageReturnC])) 300 | ; comment2 = pseq (charTokens "{-#") (many (charNoneOf [newlineC,carriageReturnC])) 301 | ; comment3 = pseq (charTokens "#" ) (many (charNoneOf [newlineC,carriageReturnC])) 302 | } 303 | 304 | -- | We need to hide some stuff (for example @include@-s) from GHC 305 | nanoPragmaL :: Lexer LexBlock 306 | nanoPragmaL = 307 | pbind (charTokens "{-%") (\_ -> 308 | pbind (many1 (locatedL lexemeL)) (\ln -> 309 | pbind (charTokens "%-}") (\_ -> 310 | pbind (ppost (many (charNoneOf [newlineC,carriageReturnC])) eol) (\_ -> preturn ln)))) 311 | 312 | -- | Note: this accepts "eof"! 313 | emptyLineL :: Lexer Unit 314 | emptyLineL = pseq spaces0 eol 315 | 316 | type LexBlock = List LToken 317 | 318 | -- | Parser a line and all following indented lines 319 | blockL :: Lexer LexBlock 320 | blockL = worker1 Fake where 321 | { line = alternative comment (many1 (locatedL lexemeL)) 322 | ; comment = pseq commentL' (preturn Nil) 323 | ; worker _ = pbind eolIndent (\k -> ifte (gt k 0) (option Nil (worker1 Fake)) (preturn Nil)) 324 | ; worker1 _ = pbind line (\ls1 -> pbind (worker Fake) (\ls2 -> preturn (append ls1 ls2))) 325 | } 326 | 327 | blockOrCommentL :: Lexer (Maybe LexBlock) 328 | blockOrCommentL = choice 329 | [ preplace Nothing commentL 330 | , preplace Nothing emptyLineL 331 | , pfmap Just nanoPragmaL 332 | , pfmap Just blockL 333 | ] 334 | 335 | programL :: Lexer (List LexBlock) 336 | programL = pfmap catMaybes (manyTill eof blockOrCommentL) 337 | 338 | lexer :: FilePath -> String -> List LexBlock 339 | lexer fname input = runParser_ fname programL (addLocations fname input) 340 | 341 | -------------------------------------------------------------------------------- 342 | -- ** The parser 343 | 344 | type Parse a = Parser Token a 345 | 346 | parseTopLevelBlock :: FilePath -> LexBlock -> TopLevel 347 | parseTopLevelBlock fname tokens = runParser_ fname topLevelP (filterWhite tokens) 348 | 349 | filterWhite :: LexBlock -> LexBlock 350 | filterWhite = filter cond where { cond ltok = isNotWhite (located ltok) } 351 | 352 | keywords :: List String 353 | keywords = [ "where" , "case" , "of" , "let" , "let_" , "in" , "module" , "import" , "include" , "data" , "type" ] 354 | 355 | isKeyword :: String -> Bool 356 | isKeyword str = elem str keywords 357 | 358 | topLevelP :: Parse TopLevel 359 | topLevelP = choice 360 | [ tyAliasP 361 | , dataDeclP 362 | , importP 363 | , includeP 364 | , moduleP 365 | , tyDeclP 366 | , pfmap TopDefin ldefinP 367 | ] 368 | 369 | tyDeclP :: Parse TopLevel 370 | tyDeclP = pbind varP (\name -> 371 | pbind (specP HasType ) (\_ -> 372 | pbind (many1 anyToken) (\toks -> preturn (TopTyDecl name toks) ))) 373 | 374 | dataDeclP :: Parse TopLevel 375 | dataDeclP = pbind (keywordP "data") (\_ -> 376 | pbind (many1 anyToken ) (\toks -> preturn (TopDataDecl toks) )) 377 | 378 | tyAliasP :: Parse TopLevel 379 | tyAliasP = pbind (keywordP "type") (\_ -> 380 | pbind (many1 anyToken ) (\toks -> preturn (TopTyAlias toks) )) 381 | 382 | -- haskell import 383 | importP :: Parse TopLevel 384 | importP = pbind (keywordP "import") (\_ -> 385 | pbind (many1 anyToken ) (\toks -> preturn (TopImport toks) )) 386 | 387 | -- our C-style include 388 | includeP :: Parse TopLevel 389 | includeP = pbind (keywordP "include") (\_ -> 390 | pbind (accept mbStrLit ) (\fname -> preturn (TopInclude fname) )) 391 | 392 | moduleP :: Parse TopLevel 393 | moduleP = pbind (keywordP "module") (\_ -> 394 | pbind (many1 anyToken ) (\toks -> preturn (TopModule toks) )) 395 | 396 | definP :: Parse DefinE 397 | definP f p t = 398 | pbind varP (\name -> 399 | pbind (many varP) (\args -> 400 | pbind (specP EqualSign) (\_ -> 401 | pbind exprP (\body -> preturn (Defin name (lamsE args body)) )))) f p t 402 | 403 | ldefinP :: Parse LDefinE 404 | ldefinP f p t = locatedL definP f p t 405 | 406 | exprP :: Parse Expr 407 | exprP f p t = 408 | pbind nakedExprP (\expr -> 409 | pbind (optional whereBlockP) (\mb -> 410 | preturn (case mb of { Nothing -> expr ; Just defs -> RecE defs expr } ))) f p t 411 | 412 | whereBlockP :: Parse (List LDefinE) 413 | whereBlockP f p t = pbind (keywordP "where") (\_ -> blockP) f p t 414 | 415 | -- | Here \"naked\" means without a where block 416 | nakedExprP :: Parse Expr 417 | nakedExprP f p t = choice 418 | [ lamExprP 419 | , pfmap listAppsE (many1 atomP) 420 | ] f p t 421 | 422 | -- | We need an explicit eta-expansion here so that it doesn't loop in GHCi 423 | -- (and probably also itself) 424 | atomP :: Parse Expr 425 | atomP f p t = choice 426 | [ inParensP exprP 427 | , pfmap LitE literalP 428 | , pfmap ListE listExprP 429 | , caseExprP 430 | , letsExprP , letRecExprP 431 | , pfmap VarE (locatedL varP) 432 | ] f p t 433 | 434 | specP :: Special -> Parse Special 435 | specP spec = preplace spec (token (SpecTok spec)) 436 | 437 | literalP :: Parse Literal 438 | literalP = accept mbLit 439 | 440 | -- capitalIdentP :: Parse Name 441 | -- capitalIdentP = accept mbVarU 442 | 443 | identP :: Parse Name 444 | identP = accept mbVar 445 | 446 | -- | This does not accepts they keywords, but accepts constructors 447 | varP :: Parse Name 448 | varP = pbind identP (\v -> ifte (isKeyword v) pfail (preturn v)) 449 | 450 | -- | This only accept uppercase identifiers 451 | conP :: Parse Name 452 | conP = accept mbVarU 453 | 454 | keywordP :: String -> Parse String 455 | keywordP word = pbind identP (\v -> ifte (stringEq v word) (preturn word) pfail) 456 | 457 | inParensP :: Parse a -> Parse a 458 | inParensP p = pbind (specP LParen) (\_ -> 459 | pbind p (\x -> 460 | pbind (specP RParen) (\_ -> preturn x))) 461 | 462 | listExprP :: Parse (List Expr ) 463 | tupleExprP :: Parse (List Expr ) 464 | blockP :: Parse (List LDefinE) 465 | branchesP :: Parse (List BranchE) 466 | 467 | listExprP f p t = pbind (specP LBracket) (\_ -> sepEndBy (specP Comma ) (specP RBracket) exprP ) f p t 468 | tupleExprP f p t = pbind (specP LParen ) (\_ -> sepEndBy (specP Comma ) (specP RParen ) exprP ) f p t 469 | blockP f p t = pbind (specP LBrace ) (\_ -> sepEndBy (specP Semicolon) (specP RBrace ) ldefinP) f p t 470 | branchesP f p t = pbind (specP LBrace ) (\_ -> sepEndBy (specP Semicolon) (specP RBrace ) branchP) f p t 471 | 472 | patternP :: Parse Pattern 473 | patternP f p t = naked f p t where 474 | { naked f p t = choice [ wild , var , inParensP patternP , apps ] f p t 475 | ; safe f p t = choice [ wild , var , inParensP patternP , con ] f p t 476 | ; wild = pbind (keywordP "_" ) (\_ -> preturn WildP ) 477 | ; var = pbind (accept mbVarL) (\v -> preturn (VarP v)) 478 | ; con = pbind conP (\con -> preturn (AppP con Nil)) 479 | ; apps = pbind conP (\con -> 480 | pbind (many safe ) (\args -> preturn (AppP con args))) 481 | } 482 | 483 | branchP :: Parse BranchE 484 | branchP f p t = alternative defaultBranchP branchP' f p t 485 | 486 | branchP' :: Parse BranchE 487 | branchP' f p t = 488 | pbind conP (\con -> 489 | pbind (many varP ) (\args -> 490 | pbind (specP Arrow) (\_ -> 491 | pbind (exprP ) (\body -> preturn (BranchE con args body))))) f p t 492 | 493 | defaultBranchP :: Parse BranchE 494 | defaultBranchP f p t 495 | = pbind (keywordP "_") (\_ -> 496 | pbind (specP Arrow ) (\_ -> 497 | pbind (exprP ) (\body -> preturn (DefaultE body)))) f p t 498 | 499 | lamExprP :: Parse Expr 500 | lamExprP f p t = 501 | pbind (specP Lambda) (\_ -> 502 | pbind (many1 varP ) (\args -> 503 | pbind (specP Arrow ) (\_ -> 504 | pbind exprP (\body -> preturn (lamsE args body))))) f p t 505 | 506 | letExprP' :: (List LDefinE -> Expr -> Expr) -> String -> Parse Expr 507 | letExprP' con letkw = pbind (keywordP letkw) (\_ -> 508 | pbind (blockP ) (\defs -> 509 | pbind (keywordP "in" ) (\_ -> 510 | pbind (exprP ) (\expr -> preturn (con defs expr))))) 511 | 512 | -- | Non-recursive let 513 | letsExprP :: Parse Expr 514 | letsExprP f p t = letExprP' LetE "let_" f p t 515 | 516 | -- | Recursive let 517 | letRecExprP :: Parse Expr 518 | letRecExprP f p t = letExprP' RecE "let" f p t 519 | 520 | caseExprP :: Parse Expr 521 | caseExprP f p t = 522 | pbind (keywordP "case") (\_ -> 523 | pbind (locatedL exprP ) (\lexpr -> 524 | pbind (keywordP "of" ) (\_ -> 525 | pbind (branchesP ) (\brs -> preturn (CaseE lexpr brs))))) f p t 526 | 527 | -------------------------------------------------------------------------------- 528 | -------------------------------------------------------------------------------- /PrimGHC.hs: -------------------------------------------------------------------------------- 1 | 2 | -- | Primops implemented in Haskell, so the compiler can be hosted by GHC too 3 | 4 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-} 5 | {-# LANGUAGE Strict, StrictData #-} 6 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances, TypeFamilies #-} 7 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-} 8 | 9 | module PrimGHC where 10 | 11 | import qualified Prelude 12 | import qualified Data.Char 13 | import qualified Data.Bits as Bits 14 | import qualified System.IO as IO 15 | import qualified System.IO.Unsafe as Unsafe 16 | import qualified System.Environment as Env 17 | import qualified System.Exit as Exit 18 | import qualified Control.Exception as Exc 19 | 20 | import Prelude ( Int , Char , Eq , Show ) 21 | import Data.String ( IsString(..) ) 22 | import GHC.Exts ( IsList (..) ) 23 | 24 | -------------------------------------------------------------------------------- 25 | 26 | import qualified Debug.Trace 27 | 28 | debug :: Show a => List Char -> a -> b -> b 29 | debug x y z = Debug.Trace.trace msg z where 30 | msg = Prelude.concat parts 31 | parts :: [Prelude.String] 32 | parts = [ ">>> " , _toGhcString x , " => " , Prelude.show y ] 33 | 34 | debug_ :: Show b => List Char -> b -> b 35 | debug_ x y = debug x y y 36 | 37 | -------------------------------------------------------------------------------- 38 | -- * Primitive types used by the primops 39 | 40 | data Unit = Unit deriving Show 41 | 42 | data Bool = False | True deriving Show 43 | 44 | data List a = Nil | Cons a (List a) deriving (Eq) 45 | 46 | type String = List Char 47 | 48 | data Maybe a = Nothing | Just a deriving Show 49 | 50 | type Handle = IO.Handle 51 | 52 | data Pair a b = Pair a b deriving Show 53 | 54 | -------------------------------------------------------------------------------- 55 | -- * IO support 56 | 57 | type IO a = IO.IO a 58 | 59 | ioreturn :: a -> IO a 60 | ioreturn = Prelude.return 61 | 62 | iobind :: IO a -> (a -> IO b) -> IO b 63 | iobind = (Prelude.>>=) 64 | 65 | {-# NOINLINE runIO #-} 66 | runIO :: IO a -> IO.IO a 67 | runIO action = do 68 | Prelude.putStrLn "[rts version = GHC]" 69 | action 70 | 71 | -------------------------------------------------------------------------------- 72 | -- * Built-ins \/ primops 73 | 74 | negate :: Int -> Int 75 | negate = Prelude.negate 76 | 77 | plus :: Int -> Int -> Int 78 | plus = (Prelude.+) 79 | 80 | minus :: Int -> Int -> Int 81 | minus = (Prelude.-) 82 | 83 | times :: Int -> Int -> Int 84 | times = (Prelude.*) 85 | 86 | div :: Int -> Int -> Int 87 | div = Prelude.div 88 | 89 | mod :: Int -> Int -> Int 90 | mod = Prelude.mod 91 | 92 | bitAnd :: Int -> Int -> Int 93 | bitAnd = (Bits..&.) 94 | 95 | bitOr :: Int -> Int -> Int 96 | bitOr = (Bits..|.) 97 | 98 | bitXor :: Int -> Int -> Int 99 | bitXor = Bits.xor 100 | 101 | shiftL :: Int -> Int -> Int 102 | shiftL = Bits.shiftL 103 | 104 | shiftR :: Int -> Int -> Int 105 | shiftR = Bits.shiftR 106 | 107 | chr :: Int -> Char 108 | chr = Data.Char.chr 109 | 110 | ord :: Char -> Int 111 | ord = Data.Char.ord 112 | 113 | -- | If-then-else *must* be lazy. Hence for now it is a primop 114 | ifte :: Bool -> a -> a -> a 115 | ifte b ~x ~y = case b of { True -> x ; False -> y } 116 | 117 | -- | It is useful if @and@ \/ @or@ shortcircuits, hence they are primops 118 | and :: Bool -> Bool -> Bool 119 | and x ~y = case x of { True -> y ; False -> False } 120 | 121 | or :: Bool -> Bool -> Bool 122 | or x ~y = case x of { True -> True ; False -> y } 123 | 124 | -- | @not@ could be user-defined, but @and@, @or@ are already primops, and it's faster this way 125 | not :: Bool -> Bool 126 | not x = case x of { True -> False ; False -> True } 127 | 128 | geq :: Eq a => a -> a -> Bool 129 | geq x y = _fromGhcBool ((Prelude.==) x y) 130 | 131 | eq :: Int -> Int -> Bool 132 | eq x y = _fromGhcBool ((Prelude.==) x y) 133 | 134 | lt :: Int -> Int -> Bool 135 | lt x y = _fromGhcBool ((Prelude.<) x y) 136 | 137 | le :: Int -> Int -> Bool 138 | le x y = _fromGhcBool ((Prelude.<=) x y) 139 | 140 | -------------------------------------------------------------------------------- 141 | 142 | {-# NOINLINE error #-} 143 | error :: String -> a 144 | error msg = Prelude.error (_toGhcString msg) 145 | 146 | print :: Show a => a -> IO Unit 147 | print what = (Prelude.>>) (Prelude.print what) (Prelude.return Unit) 148 | 149 | -------------------------------------------------------------------------------- 150 | -- * IO monad 151 | 152 | getChar :: IO (Maybe Char) 153 | getChar = Exc.catch just handler where 154 | just :: IO.IO (Maybe Char) 155 | just = (Prelude.>>=) Prelude.getChar (\c -> Prelude.return (Just c)) 156 | handler :: Exc.IOException -> IO.IO (Maybe Char) 157 | handler _ = Prelude.return Nothing 158 | 159 | putChar :: Char -> IO Unit 160 | putChar c = (Prelude.>>) (Prelude.putChar c) (Prelude.return Unit) 161 | 162 | exit :: Int -> IO Unit 163 | exit 0 = Exit.exitWith Exit.ExitSuccess 164 | exit k = Exit.exitWith (Exit.ExitFailure k) 165 | 166 | getArg :: Int -> IO (Maybe String) 167 | getArg m = Prelude.fmap (index m) (Env.getArgs) where 168 | index _ [] = Nothing 169 | index k (this:rest) = case k of 170 | 0 -> Just (_fromGhcString this) 171 | _ -> index ((Prelude.-) k 1) rest 172 | 173 | ---------------------------------------- 174 | 175 | stdin :: Handle 176 | stdin = IO.stdin 177 | 178 | stdout :: Handle 179 | stdout = IO.stdout 180 | 181 | stderr :: Handle 182 | stderr = IO.stderr 183 | 184 | data IOMode 185 | = ReadMode 186 | | WriteMode 187 | | AppendMode 188 | | ReadWriteMode 189 | deriving Show 190 | 191 | _toGhcIOMode :: IOMode -> IO.IOMode 192 | _toGhcIOMode mode = case mode of 193 | ReadMode -> IO.ReadMode 194 | WriteMode -> IO.WriteMode 195 | AppendMode -> IO.AppendMode 196 | ReadWriteMode -> IO.ReadWriteMode 197 | 198 | openFile :: String -> IOMode -> IO Handle 199 | openFile fn mode = IO.openFile (_toGhcString fn) (_toGhcIOMode mode) 200 | 201 | hClose :: Handle -> IO Unit 202 | hClose h = (Prelude.>>) (IO.hClose h) (Prelude.return Unit) 203 | 204 | hGetChar :: Handle -> IO (Maybe Char) 205 | hGetChar h = Exc.catch just handler where 206 | just :: IO.IO (Maybe Char) 207 | just = (Prelude.>>=) (IO.hGetChar h) (\c -> Prelude.return (Just c)) 208 | handler :: Exc.IOException -> IO.IO (Maybe Char) 209 | handler _ = Prelude.return Nothing 210 | 211 | hPutChar :: Handle -> Char -> IO Unit 212 | hPutChar h c = (Prelude.>>) (IO.hPutChar h c) (Prelude.return Unit) 213 | 214 | hPutStr :: Handle -> String -> IO Unit 215 | hPutStr h s = (Prelude.>>) (IO.hPutStr h (_toGhcString s)) (Prelude.return Unit) 216 | 217 | -------------------------------------------------------------------------------- 218 | -- * Marshalling to\/from standard Haskell types 219 | 220 | _fromGhcBool :: Prelude.Bool -> Bool 221 | _fromGhcBool b = case b of { Prelude.True -> True ; Prelude.False -> False } 222 | 223 | _fromGhcList :: [a] -> List a 224 | _fromGhcList = go where { go [] = Nil ; go (x:xs) = Cons x (go xs) } 225 | 226 | _toGhcList :: List a -> [a] 227 | _toGhcList = go where { go Nil = [] ; go (Cons x xs) = x : (go xs) } 228 | 229 | _fromGhcString :: Prelude.String -> String 230 | _fromGhcString = _fromGhcList 231 | 232 | _toGhcString :: String -> Prelude.String 233 | _toGhcString = _toGhcList 234 | 235 | show :: Show a => a -> String 236 | show x = _fromGhcString (Prelude.show x) 237 | 238 | instance Show a => Show (List a) where show list = Prelude.show (_toGhcList list) 239 | 240 | instance IsString (List Char) where fromString = _fromGhcString 241 | 242 | instance IsList (List a) where 243 | type (Item (List a)) = a 244 | fromList = _fromGhcList 245 | toList = _toGhcList 246 | 247 | -------------------------------------------------------------------------------- 248 | -------------------------------------------------------------------------------- /PrimOps.hs: -------------------------------------------------------------------------------- 1 | 2 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-} 3 | {-# LANGUAGE Strict #-} 4 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-} 5 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-} 6 | 7 | module PrimOps where 8 | 9 | -------------------------------------------------------------------------------- 10 | 11 | import Prelude ( Int , Char , Eq , Show ) 12 | import PrimGHC 13 | 14 | -------------------------------------------------------------------------------- 15 | 16 | import Base 17 | import Containers 18 | import Types 19 | 20 | {-% include "Base.hs" %-} 21 | {-% include "Containers.hs" %-} 22 | {-% include "Types.hs" %-} 23 | 24 | -------------------------------------------------------------------------------- 25 | -- ** Primops 26 | 27 | data Prim 28 | = Negate | Plus | Minus | Times | Div | Mod | Chr | Ord 29 | | BitAnd | BitOr | BitXor | ShiftL | ShiftR 30 | | IFTE | Not | And | Or | GenEQ | IntEQ | IntLT | IntLE 31 | | GetChar | PutChar | GetArg | Exit | Error | Print | RunIO | IOBind | IOReturn 32 | | OpenFile | HClose | HGetChar | HPutChar | HPutStr | StdIn | StdOut | StdErr 33 | deriving (Eq,Show) 34 | 35 | isLazyPrim :: Prim -> Bool 36 | isLazyPrim prim = case prim of 37 | { IFTE -> True 38 | ; And -> True 39 | ; Or -> True 40 | ; _ -> False } 41 | 42 | showPrim :: Prim -> String 43 | showPrim prim = case prim of 44 | { Negate -> "Negate" ; Plus -> "Plus" ; Minus -> "Minus" 45 | ; Times -> "Times" ; Div -> "Div" ; Mod -> "Mod" 46 | ; BitAnd -> "BitAnd" ; BitOr -> "BitOr" ; BitXor -> "BitXor" 47 | ; ShiftL -> "ShiftL" ; ShiftR -> "ShiftR" 48 | ; Chr -> "Chr" ; Ord -> "Ord" ; IFTE -> "IFTE" 49 | ; Not -> "Not" ; And -> "And" ; Or -> "Or" 50 | ; IntEQ -> "IntEQ" ; IntLT -> "IntLT" ; IntLE -> "IntLE" 51 | ; GenEQ -> "GenEQ" ; Error -> "Error" ; Exit -> "Exit" 52 | ; GetChar -> "GetChar" ; PutChar -> "PutChar" ; GetArg -> "GetArg" 53 | ; StdIn -> "StdIn" ; StdOut -> "StdOut" ; StdErr -> "StdErr" 54 | ; HGetChar -> "HGetChar" ; HPutChar -> "HPutChar" ; HClose -> "HClose" 55 | ; OpenFile -> "OpenFile" ; HPutStr -> "HPutStr" ; Print -> "Print" 56 | ; IOBind -> "IOBind" ; IOReturn -> "IOReturn" ; RunIO -> "RunIO" } 57 | 58 | data PrimOp = PrimOp Arity Prim deriving Show 59 | 60 | thePrimOps :: Mode -> Trie PrimOp 61 | thePrimOps mode = let { io m = case mode of { Compile -> inc m ; Interpret -> id m } } in trieFromList 62 | [ Pair "error" (PrimOp 1 Error ) 63 | , Pair "negate" (PrimOp 1 Negate ) 64 | , Pair "plus" (PrimOp 2 Plus ) 65 | , Pair "minus" (PrimOp 2 Minus ) 66 | , Pair "times" (PrimOp 2 Times ) 67 | , Pair "div" (PrimOp 2 Div ) 68 | , Pair "mod" (PrimOp 2 Mod ) 69 | , Pair "bitAnd" (PrimOp 2 BitAnd ) 70 | , Pair "bitOr" (PrimOp 2 BitOr ) 71 | , Pair "bitXor" (PrimOp 2 BitXor ) 72 | , Pair "shiftL" (PrimOp 2 ShiftL ) 73 | , Pair "shiftR" (PrimOp 2 ShiftR ) 74 | , Pair "chr" (PrimOp 1 Chr ) 75 | , Pair "ord" (PrimOp 1 Ord ) 76 | , Pair "ifte" (PrimOp 3 IFTE ) 77 | , Pair "not" (PrimOp 1 Not ) 78 | , Pair "and" (PrimOp 2 And ) 79 | , Pair "or" (PrimOp 2 Or ) 80 | , Pair "geq" (PrimOp 2 GenEQ ) 81 | , Pair "eq" (PrimOp 2 IntEQ ) 82 | , Pair "lt" (PrimOp 2 IntLT ) 83 | , Pair "le" (PrimOp 2 IntLE ) 84 | , Pair "getChar" (PrimOp (io 0) GetChar ) 85 | , Pair "putChar" (PrimOp (io 1) PutChar ) 86 | , Pair "getArg" (PrimOp (io 1) GetArg ) 87 | , Pair "exit" (PrimOp (io 1) Exit ) 88 | , Pair "openFile" (PrimOp (io 2) OpenFile) 89 | , Pair "hClose" (PrimOp (io 1) HClose ) 90 | , Pair "hGetChar" (PrimOp (io 1) HGetChar) 91 | , Pair "hPutChar" (PrimOp (io 2) HPutChar) 92 | , Pair "hPutStr" (PrimOp (io 2) HPutStr ) 93 | , Pair "print" (PrimOp (io 1) Print ) 94 | , Pair "iobind" (PrimOp (io 2) IOBind ) 95 | , Pair "ioreturn" (PrimOp (io 1) IOReturn) 96 | , Pair "stdin" (PrimOp 0 StdIn ) 97 | , Pair "stdout" (PrimOp 0 StdOut ) 98 | , Pair "stderr" (PrimOp 0 StdErr ) 99 | , Pair "runIO" (PrimOp 1 RunIO ) 100 | ] 101 | 102 | -------------------------------------------------------------------------------- 103 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | NanoHaskell: a self-hosting lambda calculus compiler 3 | ==================================================== 4 | 5 | The goal of this experiment is to create a self-hosting lambda calculus 6 | compiler (and interpreter) in a minimal amount of Haskell-style code. 7 | 8 | The language is (strict) lambda calculus + data constructors + simple 9 | pattern matching + recursive lets + IO effects. The syntax is chosen so that 10 | a program can be also a valid Haskell program at the same time (this makes 11 | development much easier). 12 | 13 | Haskell features like type signatures, data type declarations and imports 14 | are parsed (well, recognized...), but then ignored. 15 | 16 | 17 | Current status 18 | -------------- 19 | 20 | * it compiles via GHC, both with and without optimizations 21 | * it self-hosts, both with and without optimizations 22 | * it needs a large C stack (32+ Mb) + GCC optims (because of the lack of tail call elimination) 23 | * source code: about 2000 "essential" lines + 560 lines of type annotations; the C runtime is \~650 lines 24 | (including some debugging features) 25 | * the interpreter is not working 100% correctly at the moment 26 | 27 | 28 | Usage 29 | ----- 30 | 31 | $ nanohs -c input.hs output.c # compile with optimizations disabled 32 | $ nanhos -o input.hs output.c # compile with optimizations enabled 33 | $ nanhos -i input.hs [arg1 [arg2 ...]] # interpret 34 | 35 | Or you can just use `runghc`: 36 | 37 | $ runghc Nano.hs -c examples/church.nano tmp.c ; gcc tmp.c ; ./a.out 38 | 39 | 40 | ### Imports 41 | 42 | Haskell imports are ignored, but you can use C-style includes with the pragma: 43 | 44 | {-% include "othermodule.hs" %-} 45 | 46 | 47 | The surface language 48 | -------------------- 49 | 50 | The idea is to use a subset of Haskell syntax, so that the same 51 | program can also be compiled / interpreted by GHC. 52 | 53 | * no static type system (untyped lambda calculus) - but maybe there should be a type checker after all? 54 | * no data type declarations (constructors are arbitrary capitalized names) 55 | * no module system - instead, C-style includes 56 | * strict language (if-then-else must be lazy though; `and` / `or` shortcuts too) 57 | * ML-style side effects (but only used for IO, which is then wrapped into a monad) 58 | * only simple pattern matching + default branch (TODO: nested patterns) 59 | * no infix operators 60 | * list construction syntax `[a,b,c]` is supported 61 | * no indentation syntax (only curly braces), except for top-level blocks 62 | * only line comments, starting at the beginning of the line 63 | * built-in data types: `Int`, `Char`, `Bool`, `List`, `Maybe`, etc - those required by the primops 64 | * universal polymorphic equality comparison primop (?) 65 | * no escaping in character / string constants (TODO: maybe it's worth to do escaping?) 66 | * basic IO: standard input / output, basic file handling, early exit, command line arguments 67 | 68 | We can make the same source files to be accepted by both GHC and 69 | itself by recognizing and ignoring GHC-specific lines (pragmas, imports, 70 | type signatures, datatype declarations, type synonyms). We just put 71 | primops implementations into a PrimGHC module (as imports are ignored). 72 | 73 | We could in theory have several backends: 74 | 75 | * C99 on 64 bit architectures 76 | * TODO: x86-64 assembly 77 | * TODO: some simple bytecode virtual machine 78 | 79 | For bootstrapping philosophy it seems to be useful to have a very simple virtual 80 | machine, for which an interpreter can be very easily written in C or any other 81 | language. 82 | 83 | 84 | Compilation pipeline 85 | -------------------- 86 | 87 | 1. lexer 88 | 2. parser 89 | 3. partition recursive lets using dependency analysis 90 | 4. recognize primops 91 | 5. TODO: eliminate pattern matching into simple branching on constructors 92 | 6. collect data constructors 93 | 7. scope checking & conversion to core language 94 | 8. inline small functions + beta reduce + eliminate unused lets 95 | 9. closure conversion 96 | 10. TODO: compile to some low-level intermediate language 97 | 11. final code generation (TODO: different backends) 98 | 99 | 100 | Runtime system 101 | -------------- 102 | 103 | There is an "enviroment" stack separate from the CPU stack. This makes it 104 | very easy to find GC roots: just walk through the stack. The stack contains 105 | pointers to the heap (with the optimization that small heap objects, fitting 106 | into 61 bits, are not actually allocated). 107 | 108 | On the heap there are only two kind of objects, closures and data constructors: 109 | 110 | * data constructor (needs: tag + arity) 111 | * closure / partial application (needs: static function pointer / index, 112 | number of applied arguments, number of remaining arguments) 113 | 114 | Heap pointers are also tagged: 115 | 116 | * normal heap pointer (closure or data constructor) 117 | * 61 bit literals (int / char) 118 | * nullary constructors 119 | * static function pointers 120 | * foreign pointers (used for file handles in the C runtime) 121 | 122 | There are no thunks on the heap because we are strict. 123 | 124 | The garbage collector is a very simple copying (compacting) GC. 125 | 126 | 127 | Implementation details 128 | ---------------------- 129 | 130 | There are some minor tricks you should be aware of if you try to read the code. 131 | 132 | ### Argument order 133 | 134 | The order of function arguments on the stack, the captured variables in closures 135 | and also the order of constructor arguments on heap are all reversed compared to 136 | the "logical" (source code) order. This makes the implementation of application 137 | much simpler. 138 | 139 | [ Cons_tag argN ... arg2 arg1 ] # data constructor heap object 140 | [ Closure_tag envK ... env2 env1 ] # closure heap object 141 | [ ... | argN ... arg1 envK ... env1 | undefined ] # stack when calling a static function 142 | ^ BP ^ SP 143 | 144 | Note: our stack grows "upwards" (unlike the CPU stack which grows "downwards"). 145 | 146 | ### IO monad 147 | 148 | There is an IO monad, which in the GHC runtime and the interpreted runtime is 149 | the host's IO monad, while in the compiled code it is encoded with functions 150 | having side effects: 151 | 152 | type IO a = ActionToken -> a 153 | 154 | You need to begin your `main` function with an explicit `runIO` call (this is 155 | useful while debugging, as main can be just a simple expression instead). 156 | 157 | 158 | Organization of source code 159 | --------------------------- 160 | 161 | Base.hs - base library / prelude 162 | Closure.hs - closure conversion 163 | CodeGen.hs - code generation 164 | Containers.hs - container data structures 165 | Core.hs - core language 166 | DataCon.hs - data constructors 167 | Dependency.hs - reordering lets using the dependency graph 168 | Eval.hs - interpreter 169 | Inliner.hs - inliner + basic optimizations 170 | Nano.hs - main executable 171 | Parser.hs - parser 172 | PrimGHC.hs - the primops implemented in Haskell (so that GHC can host it) 173 | PrimOps.hs - primops 174 | ScopeCheck.hs - scope checking + conversion to core 175 | Syntax.hs - surface syntax 176 | Types.hs - common types 177 | rts.c - the runtime system implemented in C 178 | bootstrap.sh - shell script to bootstrap the compiler 179 | sloc_count.sh - shell script to measure source code size 180 | 181 | -------------------------------------------------------------------------------- /ScopeCheck.hs: -------------------------------------------------------------------------------- 1 | 2 | -- | Scope checking and conversion to core 3 | 4 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-} 5 | {-# LANGUAGE Strict #-} 6 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-} 7 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-} 8 | 9 | module ScopeCheck where 10 | 11 | -------------------------------------------------------------------------------- 12 | 13 | import Prelude ( Int , Char , Eq , Show ) 14 | import PrimGHC 15 | 16 | -------------------------------------------------------------------------------- 17 | 18 | import Base 19 | import Containers 20 | import Types 21 | import PrimOps 22 | import DataCon 23 | import Syntax 24 | import Core 25 | 26 | {-% include "Base.hs" %-} 27 | {-% include "Containers.hs" %-} 28 | {-% include "Types.hs" %-} 29 | {-% include "PrimOps.hs" %-} 30 | {-% include "DataCon.hs" %-} 31 | {-% include "Syntax.hs" %-} 32 | {-% include "Core.hs" %-} 33 | 34 | -------------------------------------------------------------------------------- 35 | -- * Config 36 | 37 | -- | Name of the entry point 38 | nanoMainIs :: Name 39 | nanoMainIs = "main" 40 | 41 | -------------------------------------------------------------------------------- 42 | -- * conversion to core 43 | 44 | exprToCore :: Trie PrimOp -> DataConTable -> Scope -> Expr -> Term 45 | exprToCore primops dconTable iniScope expr = scopeCheck dconTable iniScope (recogPrimApps1 primops expr) 46 | 47 | programToCoreProgram :: Mode -> DataConTable -> Program Expr -> CoreProgram 48 | programToCoreProgram mode dconTable blocks = CorePrg (map worker blocks) mainIdx mainTerm where 49 | { primops = thePrimOps mode 50 | ; duplicate n = concat [ "multiple declaration of " , quoteString n ] 51 | ; defins_ = map located (forgetBlockStructure blocks) 52 | ; topLevScope = trieFromListUnique duplicate (zipWithIndex (\n i -> Pair n (TopL i)) (map definedName defins_)) 53 | ; worker block = case block of 54 | { NonRecursive defin -> NonRecursive ( fmapLDefin (exprToCore primops dconTable topLevScope) defin ) 55 | ; Recursive defins -> Recursive (map (fmapLDefin (exprToCore primops dconTable topLevScope)) defins) } 56 | ; no_main_err = \_ -> error (concat [ "entry point " , quoteString nanoMainIs , " not found" ]) 57 | ; mainIdx = case trieLookup nanoMainIs topLevScope of { Just varl -> case varl of 58 | { TopL k -> k ; _ -> no_main_err Unit } ; _ -> no_main_err Unit } 59 | ; mainTerm = AtmT (VarA (Named nanoMainIs (TopV mainIdx))) } 60 | 61 | -------------------------------------------------------------------------------- 62 | -- ** Scope checking 63 | 64 | data VarL 65 | = LevL Level 66 | | TopL Int 67 | deriving Show 68 | 69 | type Scope = Trie VarL 70 | 71 | lookupVar :: Level -> Scope -> Location -> String -> Var 72 | lookupVar level scope loc name = 73 | case trieLookup name scope of 74 | { Just v -> case v of { LevL k -> IdxV (dec (minus level k)) ; TopL j -> TopV j } 75 | ; Nothing -> error (concat [ "variable name " , quoteString name , " not in scope, at " , showLocation loc ])} 76 | 77 | lookupCon :: DataConTable -> String -> Con 78 | lookupCon dcontable name = 79 | case trieLookup name dcontable of { Just con -> con ; Nothing -> 80 | error (concat [ "fatal error: constructor " , quoteString name , " not found" ])} 81 | 82 | scopeCheck :: DataConTable -> Scope -> Expr -> Term 83 | scopeCheck dcontable = go 0 where 84 | { mbAtom level scope expr = case expr of 85 | { VarE lname -> case lname of { Located loc name -> case isDCon name of 86 | { True -> Just (ConA (Named name (lookupCon dcontable name))) 87 | ; False -> Just (VarA (Named name (lookupVar level scope loc name))) }} 88 | ; LitE lit -> case lit of 89 | { StrL cs -> Nothing 90 | ; _ -> Just (KstA lit) } 91 | ; _ -> Nothing } 92 | ; go level scope expr = case expr of 93 | { VarE name -> case mbAtom level scope expr of 94 | { Just atom -> AtmT atom 95 | ; Nothing -> error "fatal error: VarE should be always an atom!" } 96 | ; StrE j -> AtmT (VarA (Named (appendInt "str_" j) (StrV j))) 97 | ; AppE e1 e2 -> case mbAtom level scope e2 of 98 | { Just atom -> AppT (go level scope e1) atom 99 | ; Nothing -> LetT (Named "letx" (go level scope e2)) (AppT (go (inc level) scope e1) (VarA (Named "letx" (IdxV 0)))) } 100 | ; LamE name body -> LamT (Named name (go (inc level) (trieInsert name (LevL level) scope) body)) 101 | ; LetE defs body -> case defs of { Nil -> go level scope body ; Cons defin rest -> case located defin of 102 | { Defin name rhs -> let { tm = go level scope rhs ; scope' = trieInsert name (LevL level) scope } 103 | in LetT (Named name tm) (go (inc level) scope' (LetE rest body)) } } 104 | ; RecE defs body -> let { n = length defs ; level' = plus level n 105 | ; f scp nameidx = case nameidx of { Pair name j -> trieInsert name (LevL j) scp } 106 | ; scope' = foldl f scope (zip (map ldefinedName defs) (rangeFrom level n)) 107 | } in RecT n (map (goDefin level' scope') defs) (go level' scope' body) 108 | ; CaseE lwhat brs -> case lwhat of { Located loc what -> case mbAtom level scope what of 109 | { Just atom -> goCase level scope loc atom brs 110 | ; Nothing -> LetT (Named "scrut" (go level scope what)) (goCase (inc level) scope loc (VarA (Named "scrut" (IdxV 0))) brs) }} 111 | ; LitE lit -> case lit of 112 | { StrL cs -> go level scope (ListE (map (\x -> LitE (ChrL x)) cs)) 113 | ; _ -> AtmT (KstA lit) } 114 | ; ListE exprs -> case exprs of 115 | { Nil -> AtmT (ConA (Named "Nil" con_Nil)) 116 | ; Cons e es -> go level scope (AppE (AppE (VarE (fakeLocated "Cons")) e) (ListE es)) } 117 | ; PrimE prim args -> case prim of { PrimOp _arity pri -> case isLazyPrim pri of 118 | { False -> goPrim prim 0 level scope Nil args 119 | -- { False -> goArgs level scope args (PriT prim 120 | -- (zipWithIndex (\i j -> VarA (Named (appendInt "name" j) (IdxV i))) (reverse (range (length args))) )) 121 | ; True -> LZPT prim (map (go level scope) args) }} 122 | } 123 | -- ; finishPrim :: PrimOp -> List (Either Term Atom) -> Int -> Term 124 | ; finishPrim prim theEis ofs = let 125 | { theVars = zipWithIndex (\i j -> VarA (Named (appendInt "parg" j) (IdxV i))) (reverse (range ofs)) 126 | ; nameIt var x = case var of { VarA named -> case named of { Named n _ -> Named n x }} 127 | ; worker eis vars atoms = case eis of { Nil -> PriT prim (reverse atoms) ; Cons ei eis' -> case ei of 128 | { Right atom -> worker eis' vars (Cons (shiftAtomRight ofs atom) atoms) 129 | ; Left term -> case vars of { Cons var vars' -> LetT (nameIt var term) (worker eis' vars' (Cons var atoms)) }}} 130 | } in worker theEis theVars Nil 131 | -- ; goPrim :: PrimOp -> Int -> Level -> Scope -> List (Either Term Atom) -> List Expr -> Term 132 | ; goPrim prim ofs level scope newargs oldargs = case oldargs of 133 | { Nil -> finishPrim prim (reverse newargs) ofs 134 | ; Cons this rest -> case mbAtom (minus level ofs) scope this of 135 | { Just atom -> goPrim prim ofs level scope (Cons (Right atom ) newargs) rest 136 | ; Nothing -> goPrim prim (inc ofs) (inc level) scope (Cons (Left (go level scope this)) newargs) rest }} 137 | -- ; goArgs level scope args body = case args of 138 | -- { Nil -> body 139 | -- ; Cons this rest -> LetT (go level scope this) (goArgs (inc level) scope rest body) } 140 | ; goDefin level scope defin = case located defin of { Defin name what -> Named name (go level scope what) } 141 | ; goCase level scope loc var branches = CasT (Located loc var) (map goBranch branches) where 142 | { goBranch branch = case branch of 143 | { DefaultE rhs -> DefaultT (go level scope rhs) 144 | ; BranchE con args rhs -> let { n = length args ; level' = plus level n 145 | ; f scp nameidx = case nameidx of { Pair name j -> trieInsert name (LevL j) scp } 146 | ; scope' = foldl f scope (zip args (rangeFrom level n)) 147 | } in BranchT (Named con (lookupCon dcontable con)) n (go level' scope' rhs) } } } 148 | 149 | -------------------------------------------------------------------------------- 150 | -------------------------------------------------------------------------------- /Syntax.hs: -------------------------------------------------------------------------------- 1 | 2 | -- | Surface syntax 3 | 4 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-} 5 | {-# LANGUAGE Strict #-} 6 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-} 7 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-} 8 | 9 | module Syntax where 10 | 11 | -------------------------------------------------------------------------------- 12 | 13 | import Prelude ( Int , Char , Eq , Show ) 14 | import PrimGHC 15 | 16 | -------------------------------------------------------------------------------- 17 | 18 | import Base 19 | import Containers 20 | import Types 21 | -- import Core 22 | import PrimOps 23 | 24 | {-% include "Base.hs" %-} 25 | {-% include "Containers.hs" %-} 26 | {-% include "Types.hs" %-} 27 | -- {-% include "Core.hs" %-} 28 | {-% include "PrimOps.hs" %-} 29 | 30 | -------------------------------------------------------------------------------- 31 | -- * Surface syntax 32 | 33 | type DefinE = Defin Expr 34 | type LDefinE = LDefin Expr 35 | 36 | -- | We \"parse\" (well, recognize) type declarations, data declarations, 37 | -- type synonyms and imports, but we ignore them; this is simply so that the 38 | -- this source code can be a valid Haskell program and self-hosting at the 39 | -- same time. 40 | data TopLevel 41 | = TopDefin LDefinE 42 | | TopTyDecl Name (List Token) 43 | | TopDataDecl (List Token) 44 | | TopTyAlias (List Token) 45 | | TopImport (List Token) 46 | | TopInclude FilePath 47 | | TopModule (List Token) 48 | deriving Show 49 | 50 | filterIncludes :: List TopLevel -> List FilePath 51 | filterIncludes = go where { go ls = case ls of { Nil -> Nil ; Cons this rest -> 52 | case this of { TopInclude fn -> Cons fn (go rest) ; _ -> go rest }}} 53 | 54 | mbDefin :: TopLevel -> Maybe LDefinE 55 | mbDefin toplev = case toplev of { TopDefin def -> Just def ; _ -> Nothing } 56 | 57 | -------------------------------------------------------------------------------- 58 | 59 | type LExpr = Located Expr 60 | 61 | data Expr 62 | = VarE LName 63 | | AppE Expr Expr 64 | | LamE Name Expr 65 | | LetE (List LDefinE) Expr 66 | | RecE (List LDefinE) Expr 67 | | CaseE LExpr (List BranchE) 68 | | LitE Literal 69 | | ListE (List Expr) 70 | | PrimE PrimOp (List Expr) 71 | | StrE Int 72 | | MainE 73 | deriving Show 74 | 75 | data BranchE 76 | = BranchE Name (List Name) Expr 77 | | DefaultE Expr 78 | deriving Show 79 | 80 | -- data BranchE 81 | -- = BranchE Pattern Expr 82 | -- deriving Show 83 | 84 | isLambdaExpr :: Expr -> Bool 85 | isLambdaExpr expr = case expr of { LamE _ _ -> True ; _ -> False } 86 | 87 | lamsE :: List Name -> Expr -> Expr 88 | lamsE args body = case args of { Nil -> body ; Cons v vs -> LamE v (lamsE vs body) } 89 | 90 | appsE :: Expr -> List Expr -> Expr 91 | appsE fun args = case args of { Nil -> fun ; Cons t ts -> appsE (AppE fun t) ts } 92 | 93 | listAppsE :: List Expr -> Expr 94 | listAppsE list = case list of { Cons f args -> appsE f args ; Nil -> error "listAppsE" } 95 | 96 | -------------------------------------------------------------------------------- 97 | -- ** TODO: Pattern compiler 98 | 99 | -- SimP = simple pattern (we compile down to this) 100 | -- AppP = constructor pattern 101 | -- VarP = variable pattern 102 | -- WildP = wildcard patterns 103 | -- ListP = list pattern 104 | data Pattern 105 | = SimP Name (List Name) 106 | | AppP Name (List Pattern) 107 | | VarP Name 108 | | WildP 109 | | ListP (List Pattern) 110 | deriving Show 111 | 112 | patternHead :: Pattern -> Maybe Name 113 | patternHead pat = case pat of 114 | { SimP con _list -> Just con 115 | ; AppP con _list -> Just con 116 | ; _ -> Nothing } 117 | 118 | -- -- | We translate complex pattern into iterated matching of simple patterns 119 | -- patternCompiler :: List BranchE -> List BranchE 120 | -- patternCompiler 121 | 122 | -------------------------------------------------------------------------------- 123 | -- ** free and bound variables 124 | 125 | -- | Free variables in an expression 126 | exprFreeVars :: Expr -> TrieSet 127 | exprFreeVars expr = go trieEmpty expr where 128 | { go bound expr = case expr of 129 | { VarE lname -> let { nam = located lname } in case trieMember nam bound of { False -> trieSetSingleton nam ; _ -> trieEmpty } 130 | ; AppE e1 e2 -> trieUnion (go bound e1) (go bound e2) 131 | ; LamE v body -> go (trieSetInsert v bound) body 132 | ; LetE defs body -> goLets bound (map located defs) body 133 | ; RecE defs body -> goLetRec bound (map located defs) body 134 | ; CaseE what brs -> trieUnion (go bound (located what)) (trieUnions (map (goBranch bound) brs)) 135 | ; LitE _ -> trieEmpty 136 | ; ListE list -> trieUnions (map (go bound) list) 137 | ; PrimE p list -> trieUnions (map (go bound) list) 138 | ; StrE _ -> trieEmpty } 139 | ; goBranch bound branch = case branch of 140 | { DefaultE rhs -> go bound rhs 141 | ; BranchE _ ns rhs -> let { bound' = trieUnion (trieSetFromList ns) bound } in go bound' rhs } 142 | ; goLets bound defs body = case defs of { Nil -> go bound body ; Cons def rest -> case def of { Defin nam rhs -> 143 | let { bound' = trieSetInsert nam bound } in trieUnion (go bound rhs) (goLets bound' rest body) } } 144 | ; goLetRec bound defs body = let 145 | { bound' = trieUnion (trieSetFromList (map definedName defs)) bound 146 | ; stuff = map (\defin -> go bound' (definedWhat defin)) defs 147 | } in trieUnion (trieUnions stuff) (go bound' body) } 148 | 149 | -------------------------------------------------------------------------------- 150 | -- ** recognize PrimOps 151 | 152 | -- | From @((f x) y) z@ we create the list [f,x,y,z] 153 | recogAppsE :: Expr -> List Expr 154 | recogAppsE = compose reverse go where 155 | { go expr = case expr of { AppE f x -> Cons x (go f) ; _ -> singleton expr } } 156 | 157 | -- | temporary argument names for saturating primops. Note that all primops have at most 3 arguments 158 | tmpVars :: List Name 159 | tmpVars = ["x1","x2","x3","x4","x5"] 160 | -- tmpVars = map (\i -> append "x" (showInt i)) (rangeFrom 1 5) 161 | 162 | -- | Saturate primop application 163 | saturatePrimApp :: Location -> PrimOp -> List Expr -> Expr 164 | saturatePrimApp loc primop args = case primop of { PrimOp arity prim -> case compare nargs arity of 165 | { EQ -> PrimE primop args 166 | ; GT -> appsE (PrimE primop (take arity args)) (drop arity args) 167 | ; LT -> let { vars = take (minus arity nargs) tmpVars } 168 | in lamsE vars (PrimE primop (append args (map (\v -> VarE (Located loc v)) vars))) 169 | } } 170 | where { nargs = length args } 171 | 172 | -- | Recognize primop applications, and saturate them if necessary 173 | recogPrimApps :: Mode -> List DefinE -> List DefinE 174 | recogPrimApps mode prg = map (fmapDefin (recogPrimApps1 primops)) prg where { primops = thePrimOps mode } 175 | 176 | -- | Recognize primop applications, and saturate them if necessary 177 | recogPrimApps1 :: Trie PrimOp -> Expr -> Expr 178 | recogPrimApps1 primops = go where 179 | { goVar lname = case trieLookup (located lname) primops of 180 | { Nothing -> VarE lname 181 | ; Just primop -> saturatePrimApp (location lname) primop [] } 182 | ; go expr = case expr of 183 | { VarE lname -> goVar lname 184 | ; AppE _ _ -> case recogAppsE expr of { Cons f args -> case f of 185 | { VarE ln -> case trieLookup (located ln) primops of 186 | { Nothing -> appsE (VarE ln) (map go args) 187 | ; Just primop -> saturatePrimApp (location ln) primop (map go args) } 188 | ; _ -> appsE (go f) (map go args) } } 189 | ; LamE arg body -> LamE arg (go body) 190 | ; LetE defs body -> LetE (map goDefin defs) (go body) 191 | ; RecE defs body -> RecE (map goDefin defs) (go body) 192 | ; CaseE what brs -> CaseE (lfmap go what) (map goBranch brs) 193 | ; ListE exprs -> ListE (map go exprs) 194 | ; _ -> expr } 195 | ; goBranch branch = case branch of 196 | { BranchE con args rhs -> BranchE con args (go rhs) 197 | ; DefaultE rhs -> DefaultE (go rhs) } 198 | ; goDefin ldefin = case ldefin of { Located loc defin -> case defin of 199 | { Defin name expr -> Located loc (Defin name (go expr)) } } } 200 | 201 | -------------------------------------------------------------------------------- 202 | -- * extract string constants 203 | 204 | type StrState = Pair Int (List String) 205 | type Stringy a = State StrState a 206 | 207 | addString :: String -> Stringy Int 208 | addString what = sbind sget (\pair -> case pair of { Pair n list -> 209 | sbind (sput (Pair (inc n) (Cons what list))) (\_ -> sreturn n) }) 210 | 211 | extractStringConstants :: List LDefinE -> Pair (List String) (List LDefinE) 212 | extractStringConstants program = case runState (smapM worker program) (Pair 0 Nil) of 213 | { Pair fstate prg' -> Pair (reverse (snd fstate)) prg' } 214 | where { worker ldefin = case ldefin of { Located loc defin -> case defin of 215 | { Defin name rhs -> sfmap (\x -> Located loc (Defin name x)) (extractStringConstants1 rhs) }}} 216 | 217 | extractStringConstants1 :: Expr -> Stringy Expr 218 | extractStringConstants1 expr = go expr where 219 | { go expr = case expr of 220 | { LitE lit -> case lit of { StrL str -> sfmap StrE (addString str) ; _ -> sreturn expr } 221 | ; VarE _ -> sreturn expr 222 | ; AppE e1 e2 -> sliftA2 AppE (go e1) (go e2) 223 | ; LamE n body -> sfmap (LamE n) (go body) 224 | ; LetE defs rhs -> sliftA2 LetE (smapM goDefin defs) (go rhs) 225 | ; RecE defs rhs -> sliftA2 RecE (smapM goDefin defs) (go rhs) 226 | ; CaseE what brs -> sliftA2 CaseE (lgo what) (smapM goBranch brs) 227 | ; ListE ls -> sfmap ListE (smapM go ls) 228 | ; PrimE pri args -> sfmap (PrimE pri) (smapM go args) } 229 | ; goDefin ldefin = case ldefin of { Located loc defin -> case defin of 230 | { Defin name rhs -> sfmap (\e -> Located loc (Defin name e)) (go rhs) }} 231 | ; goBranch branch = case branch of 232 | { BranchE con args rhs -> sfmap (BranchE con args) (go rhs) 233 | ; DefaultE rhs -> sfmap (DefaultE ) (go rhs) } 234 | ; lgo lexpr = case lexpr of { Located loc expr -> sfmap (Located loc) (go expr) } } 235 | 236 | -------------------------------------------------------------------------------- 237 | 238 | 239 | -------------------------------------------------------------------------------- /Types.hs: -------------------------------------------------------------------------------- 1 | 2 | -- | Common types and functions 3 | 4 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-} 5 | {-# LANGUAGE Strict #-} 6 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-} 7 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-} 8 | 9 | module Types where 10 | 11 | -------------------------------------------------------------------------------- 12 | 13 | import Prelude ( Int , Char , Eq , Show ) 14 | import PrimGHC 15 | 16 | -------------------------------------------------------------------------------- 17 | 18 | import Base 19 | import Containers 20 | 21 | {-% include "Base.hs" %-} 22 | {-% include "Containers.hs" %-} 23 | 24 | -------------------------------------------------------------------------------- 25 | -- * Some common types 26 | 27 | -- | Names 28 | type Name = String 29 | 30 | -- | Arity 31 | type Arity = Int 32 | 33 | -- | De Bruijn level 34 | type Level = Int 35 | 36 | -- | De Bruijn index 37 | type Idx = Int 38 | 39 | -- | Constructor index 40 | type Con = Int 41 | 42 | -- | Size 43 | type Size = Int 44 | 45 | -- | Top-level index 46 | type TopIdx = Int 47 | 48 | -- | Static function index 49 | type Static = Int 50 | 51 | -- | Mapping constructor names to constructor tags 52 | type DataConTable = Trie Con 53 | 54 | -- | Are we compiling or interpreting? This is relevant with primops, 55 | -- where the two behaves differently... 56 | data Mode 57 | = Compile 58 | | Interpret 59 | deriving Show 60 | 61 | -- | Sometimes we need some fake argument for recursive definitions... (?!?) 62 | data Fake = Fake 63 | 64 | -------------------------------------------------------------------------------- 65 | -- ** Named things 66 | 67 | -- | We want to keep names for debugging \/ pretty printing 68 | data Named a = Named Name a deriving Show 69 | 70 | nfmap :: (a -> b) -> Named a -> Named b 71 | nfmap f named = case named of { Named name x -> Named name (f x) } 72 | 73 | forgetName :: Named a -> a 74 | forgetName x = case x of { Named _ y -> y } 75 | 76 | nameOf :: Named a -> String 77 | nameOf x = case x of { Named n _ -> n } 78 | 79 | -------------------------------------------------------------------------------- 80 | -- ** Definitions 81 | 82 | -- | Definitions 83 | data Defin a = Defin Name a deriving Show 84 | 85 | fmapDefin :: (a -> b) -> Defin a -> Defin b 86 | fmapDefin f defin = case defin of { Defin n x -> Defin n (f x) } 87 | 88 | definedName :: Defin a -> Name 89 | definedName defin = case defin of { Defin n _ -> n } 90 | 91 | definedWhat :: Defin a -> a 92 | definedWhat defin = case defin of { Defin _ e -> e } 93 | 94 | definToPair :: Defin a -> Pair Name a 95 | definToPair def = case def of { Defin n rhs -> Pair n rhs } 96 | 97 | definToNamed :: Defin a -> Named a 98 | definToNamed def = case def of { Defin n rhs -> Named n rhs } 99 | 100 | namedToDefin :: Named a -> Defin a 101 | namedToDefin named = case named of { Named n x -> Defin n x } 102 | 103 | ldefinToPair :: LDefin a -> Pair Name (Located a) 104 | ldefinToPair ldef = case ldef of { Located loc def -> case def of { Defin n rhs -> Pair n (Located loc rhs) } } 105 | 106 | -------------------------------------------------------------------------------- 107 | 108 | type LDefin a = Located (Defin a) 109 | 110 | fmapLDefin :: (a -> b) -> LDefin a -> LDefin b 111 | fmapLDefin f = lfmap (fmapDefin f) 112 | 113 | ldefinedName :: LDefin a -> Name 114 | ldefinedName = compose definedName located 115 | 116 | nameAndLoc :: LDefin a -> Pair Name Location 117 | nameAndLoc ldefin = case ldefin of { Located loc defin -> case defin of { Defin name _ -> Pair name loc }} 118 | 119 | showNameAndLoc :: LDefin a -> String 120 | showNameAndLoc ldefin = case nameAndLoc ldefin of { Pair name loc -> append3 (quoteString name) " at " (showLocation loc) } 121 | 122 | -------------------------------------------------------------------------------- 123 | -- * Programs 124 | 125 | -- | We partition our programs into non-recursive definitions and mutually recursive blocks 126 | data Block a 127 | = NonRecursive (LDefin a) 128 | | Recursive (List (LDefin a)) 129 | deriving Show 130 | 131 | type Program a = List (Block a) 132 | 133 | forgetBlockStructure :: Program a -> List (LDefin a) 134 | forgetBlockStructure prg = go prg where 135 | { go blocks = case blocks of { Nil -> Nil ; Cons this rest -> case this of 136 | { NonRecursive defin -> Cons defin (go rest) 137 | ; Recursive defins -> append defins (go rest) } } } 138 | 139 | -------------------------------------------------------------------------------- 140 | -- ** Literals 141 | 142 | data Literal 143 | = IntL Int 144 | | ChrL Char 145 | | StrL String 146 | deriving (Eq,Show) 147 | 148 | showLiteral :: Literal -> String 149 | showLiteral lit = case lit of 150 | { IntL n -> showInt n 151 | ; ChrL c -> showChar c 152 | ; StrL s -> doubleQuoteString s } 153 | 154 | -------------------------------------------------------------------------------- 155 | -- ** Variables 156 | 157 | -- | Variables can be a de Bruijn index, or level, or a top-level definition, or a static string index 158 | data Var 159 | = IdxV Idx 160 | | LevV Level 161 | | TopV Static 162 | | StrV Int 163 | deriving Show 164 | 165 | prettyVar :: Var -> String 166 | prettyVar var = case var of 167 | { IdxV i -> concat [ "$" , showInt i ] 168 | ; LevV j -> concat [ "#" , showInt j ] 169 | ; TopV k -> concat [ "statfun(" , showInt k , ")" ] 170 | ; StrV m -> concat [ "str<" , showInt m , ">" ]} 171 | 172 | -------------------------------------------------------------------------------- 173 | -- ** Atoms 174 | 175 | -- | Things which can be applied, case-branched, passed to primops 176 | data Atom 177 | = VarA (Named Var) 178 | | ConA (Named Con) 179 | | KstA Literal 180 | deriving Show 181 | 182 | prettyAtom :: Atom -> String 183 | prettyAtom atom = case atom of 184 | { VarA nvar -> append (nameOf nvar) (prettyVar (forgetName nvar)) 185 | ; KstA lit -> showLiteral lit 186 | ; ConA ncon -> nameOf ncon } 187 | -- ; ConA ncon -> case ncon of { Named name con -> append3 name ":" (showNat con) } 188 | 189 | -------------------------------------------------------------------------------- 190 | -- ** Source positions and locations 191 | 192 | -- | @SrcPos row col@; starting from (1,1) 193 | data SrcPos = SrcPos Int Int deriving Show 194 | 195 | startSrcPos :: SrcPos 196 | startSrcPos = SrcPos 1 1 197 | 198 | startCol :: SrcPos -> SrcPos 199 | startCol pos = case pos of { SrcPos row col -> SrcPos row 1 } 200 | 201 | nextCol :: SrcPos -> SrcPos 202 | nextCol pos = case pos of { SrcPos row col -> SrcPos row (inc col) } 203 | 204 | nextRow :: SrcPos -> SrcPos 205 | nextRow pos = case pos of { SrcPos row col -> SrcPos (inc row) 1 } 206 | 207 | nextSrcPos :: Char -> SrcPos -> SrcPos 208 | nextSrcPos ch pos 209 | = ifte (ceq ch newlineC ) (nextRow pos) 210 | ( ifte (ceq ch carriageReturnC) (startCol pos) (nextCol pos) ) 211 | 212 | showSrcPos :: SrcPos -> String 213 | showSrcPos pos = case pos of { SrcPos row col -> 214 | append ("line ") (append3 (showNat row) (", column ") (showNat col)) } 215 | 216 | showSrcPos_ :: SrcPos -> String 217 | showSrcPos_ pos = case pos of { SrcPos row col -> (append3 (showNat row) ":" (showNat col)) } 218 | 219 | showSrcPos' :: FilePath -> SrcPos -> String 220 | showSrcPos' fname pos = append3 "file " (doubleQuoteString fname) (append ", " (showSrcPos pos)) 221 | 222 | showLocation :: Location -> String 223 | showLocation loc = case loc of { Loc fname pos1 pos2 -> concat 224 | [ "file " , doubleQuoteString fname , ", " , showSrcPos_ pos1 , "--" , showSrcPos_ pos2 ] } 225 | 226 | -- | Note: For stringy code-gen, we have to escape double quotes, because the became string literals 227 | escapedShowLocation :: Location -> String 228 | escapedShowLocation loc = case loc of { Loc fname pos1 pos2 -> concat 229 | [ "file " , escapedDoubleQuoteString fname , ", " , showSrcPos_ pos1 , "--" , showSrcPos_ pos2 ] } 230 | 231 | data Location = Loc FilePath SrcPos SrcPos deriving Show 232 | data Located a = Located Location a deriving Show 233 | 234 | type LName = Located Name 235 | type LAtom = Located Atom 236 | 237 | lfmap :: (a -> b) -> Located a -> Located b 238 | lfmap f located = case located of { Located loc x -> Located loc (f x) } 239 | 240 | locFn loc = case loc of { Loc fn _ _ -> fn } 241 | locStart loc = case loc of { Loc _ pos1 _ -> pos1 } 242 | locEnd loc = case loc of { Loc _ _ pos2 -> pos2 } 243 | 244 | location lx = case lx of { Located loc _ -> loc } 245 | located lx = case lx of { Located _ x -> x } 246 | 247 | locatedStart = compose locStart location 248 | locatedEnd = compose locEnd location 249 | 250 | fakeLocation = Loc "" (SrcPos 0 0) (SrcPos 0 0) 251 | fakeLocated x = Located fakeLocation x 252 | 253 | -------------------------------------------------------------------------------- 254 | -- ** Tokens 255 | 256 | data Special 257 | = LParen | RParen | LBrace | RBrace | LBracket | RBracket | Dot 258 | | Comma | Semicolon | EqualSign | Lambda | Pipe | Arrow | DArrow | HasType 259 | deriving (Eq,Show) 260 | 261 | data Token 262 | = VarTok Name 263 | | LitTok Literal 264 | | SpecTok Special 265 | | WhiteTok 266 | deriving (Eq,Show) 267 | 268 | -- | Token wiht a location 269 | type LToken = Located Token 270 | 271 | -------------------------------------------------------------------------------- 272 | -- * matching on short lists 273 | 274 | nullary :: List a -> b -> b 275 | unary :: List a -> (a -> b) -> b 276 | binary :: List a -> (a -> a -> b) -> b 277 | ternary :: List a -> (a -> a -> a -> b) -> b 278 | 279 | nullary args f = case args of { _ -> f } 280 | unary args f = case args of { Cons x xs -> f x ; _ -> error "unary: not enough arguments" } 281 | binary args f = case args of { Cons x xs -> unary xs (f x) ; _ -> error "binary: not enough arguments" } 282 | ternary args f = case args of { Cons x xs -> binary xs (f x) ; _ -> error "ternary: not enough arguments" } 283 | 284 | -------------------------------------------------------------------------------- 285 | -------------------------------------------------------------------------------- /_main_old.hs: -------------------------------------------------------------------------------- 1 | 2 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-} 3 | {-# LANGUAGE Strict, StrictData #-} 4 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-} 5 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-} 6 | 7 | module Main where 8 | 9 | -------------------------------------------------------------------------------- 10 | 11 | import Prelude ( Int , Char , Eq , Show(..) , (++) , (.) , ($) ) 12 | import qualified Prelude 13 | 14 | import qualified Control.Monad 15 | 16 | import System.Environment 17 | 18 | import PrimGHC 19 | 20 | import Nano hiding ( main ) 21 | 22 | import Base 23 | import Containers 24 | import Types 25 | import PrimOps 26 | import DataCon 27 | import Syntax 28 | import Parser 29 | import Dependency 30 | import Core 31 | import ScopeCheck 32 | import Inliner 33 | import Closure 34 | import CodeGen 35 | 36 | import qualified Text.Show.Pretty as Pretty 37 | 38 | -------------------------------------------------------------------------------- 39 | 40 | myPrettyTermDefin :: Prelude.String -> Defin Term -> Prelude.IO () 41 | myPrettyTermDefin prefix (Defin n t) = Prelude.putStrLn (Prelude.concat list) where 42 | list = [ prefix , _toGhcString n , " := " , _toGhcString (showTerm t) ] :: [Prelude.String] 43 | 44 | myPrettyTermBlock :: Block Term -> Prelude.IO () 45 | myPrettyTermBlock (NonRecursive def) = myPrettyTermDefin "- " def 46 | myPrettyTermBlock (Recursive defs ) = Control.Monad.mapM_ (myPrettyTermDefin "> ") (_toGhcList defs) 47 | 48 | 49 | main = do 50 | Prelude.putStrLn "*** main.hs" 51 | args <- System.Environment.getArgs 52 | case args of 53 | [] -> error "usage: runghc main.hs " 54 | (fn:_) -> compile fn 55 | 56 | -- type Files = List FilePath 57 | -- type Loaded = Pair Files (List TopLevel) 58 | -- 59 | -- loadAndParseMany :: Files -> List FilePath -> IO Loaded 60 | -- loadAndParseMany sofar fnames = case fnames of { Nil -> ioreturn (Pair sofar Nil) ; Cons this rest -> 61 | -- iobind (loadAndParse1 sofar this) (\loaded -> case loaded of { Pair sofar' toplevs1 -> 62 | -- iobind (loadAndParseMany sofar' rest) (\loaded -> case loaded of { Pair sofar'' toplevs2 -> 63 | -- ioreturn (Pair sofar'' (append toplevs1 toplevs2)) }) }) } 64 | -- 65 | -- loadAndParse1 :: Files -> FilePath -> IO Loaded 66 | -- loadAndParse1 sofar fname = case stringMember fname sofar of 67 | -- { True -> ioreturn (Pair sofar Nil) 68 | -- ; False -> iobind (readFile fname) (\text -> ioseq (putStrLn (append "+ " fname)) (let 69 | -- { blocks = lexer fname text 70 | -- ; toplevs = map (parseTopLevelBlock fname) blocks 71 | -- ; includes = filterIncludes toplevs 72 | -- ; sofar' = Cons fname sofar 73 | -- } in iobind (loadAndParseMany sofar' includes) (\loaded -> case loaded of { Pair sofar'' toplevs2 -> 74 | -- ioreturn (Pair sofar'' (append toplevs toplevs2)) }))) } 75 | 76 | compile fname = do 77 | let source = (_fromGhcString fname) 78 | 79 | -- text <- Prelude.readFile fname 80 | -- 81 | -- let blocks = (lexer source (_fromGhcString text)) 82 | -- Prelude.putStrLn ("number of top level blocks = " ++ show (length blocks)) 83 | -- -- Prelude.print (_toGhcList blocks) 84 | -- 85 | -- let toplevs = map (parseTopLevelBlock source) blocks 86 | ---- Prelude.putStrLn "\n----------------------------------\nSYNTAX BLOCKS" 87 | ---- Control.Monad.mapM_ Prelude.print (_toGhcList toplevs) 88 | 89 | loaded <- runIO (loadAndParse1 Nil source) 90 | let toplevs = snd loaded 91 | 92 | let defins0 = catMaybes (map mbDefin toplevs) 93 | let Pair strlits defins1 = extractStringConstants defins0 94 | Prelude.putStrLn "\n----------------------------------\nTOPLEVEL DEFINS" 95 | Control.Monad.mapM_ Prelude.print (_toGhcList (recogPrimApps defins1)) 96 | 97 | let dconTrie = collectDataCons defins1 98 | Prelude.putStrLn "\n----------------------------------\nCONSTRUCTORS" 99 | Control.Monad.mapM_ Prelude.print (_toGhcList (trieToList dconTrie)) 100 | 101 | let blocks = reorderProgram defins1 102 | Prelude.putStrLn "\n----------------------------------\nREORDERED TOPLEVEL DEFINS" 103 | Control.Monad.mapM_ Prelude.print (_toGhcList blocks) 104 | 105 | let coreprg@(CorePrg coredefs mainIdx mainTerm) = programToCoreProgram dconTrie blocks 106 | Prelude.putStrLn "\n----------------------------------\nCORE" 107 | Control.Monad.mapM_ myPrettyTermBlock (_toGhcList coredefs) 108 | Prelude.print (mainIdx,mainTerm) 109 | 110 | -- let coreprg'@(CorePrg coredefs' mainIdx' mainTerm') = inlineCorePrg 24 coreprg 111 | let coreprg'@(CorePrg coredefs' mainIdx' mainTerm') = optimizeCorePrg coreprg 112 | Prelude.putStrLn "\n----------------------------------\nOPTIMIZED CORE" 113 | Control.Monad.mapM_ myPrettyTermBlock (_toGhcList coredefs') 114 | Prelude.print (mainIdx',mainTerm') 115 | 116 | 117 | let lprogram = coreProgramToLifted coreprg' 118 | LProgram statfuns topidxs lmain = lprogram 119 | Prelude.putStrLn "\n----------------------------------\nLIFTED" 120 | Control.Monad.mapM_ Prelude.print (_toGhcList statfuns) 121 | Prelude.print lmain 122 | -- Prelude.print topidxs 123 | 124 | let code = runCodeGenM_ (liftedProgramToCode source strlits dconTrie lprogram) 125 | -- Prelude.putStrLn "\n----------------------------------\nASM" 126 | -- Control.Monad.mapM_ (Prelude.putStrLn . _toGhcString) (_toGhcList asm) 127 | Prelude.writeFile "tmp.c" (Prelude.unlines $ Prelude.map _toGhcString $ _toGhcList code) 128 | 129 | -- let val = eval Nil core 130 | -- Prelude.putStrLn "\n----------------------------------\nINTEPRETED RESULT" 131 | -- Prelude.print (showValue val) 132 | 133 | -------------------------------------------------------------------------------- /asm_shit/rts.asm: -------------------------------------------------------------------------------- 1 | 2 | ; 3 | ; RTS conventions 4 | ; =============== 5 | ; A) On the heap, we can have 1) closures/PAPs and 2) data constructors 6 | ; these are distinguished by bit #2. We reserve bits #0/#1 for GC. 7 | ; * for closures, the second word is the function pointer, and the rest 8 | ; is the environment / partially applied arguments 9 | ; * for data constructors, the tag is simply followed by the arguments. 10 | ; B) Heap pointers can be actual pointers or objects fitting in a machine word. 11 | ; Since heap pointers are aligned, we can use the low 3 bits to distinguish 12 | ; between these.: 13 | ; * low bits = 000 means an actual heap pointer 14 | ; * low bits = 001 means a 61 bit literal (int or char). 15 | ; * low bits = 010 means a static function pointer 16 | ; * low bits = 100 means a nullary data constructor 17 | ; This means that static functions must be aligned too 18 | ; C) We have a custom stack to store the enviroment. The stack pointer is RBP 19 | ; Arguments are always passed on this stack 20 | ; 21 | 22 | bits 64 23 | global start 24 | 25 | section .bss 26 | 27 | align 8 28 | heap: resb (1024*1024*128) ; 128 mb 29 | stack: resq (1024*1024) ; 8 mb 30 | 31 | section .data 32 | 33 | errmsg_app: db "runtime error: invalid application",10 34 | .len: equ $ - errmsg_app 35 | 36 | section .text 37 | 38 | %define CON_False 0 39 | %define CON_True 1 40 | %define CON_Unit 2 41 | %define CON_Nil 3 42 | %define CON_Cons 4 43 | %define CON_Nothing 5 44 | %define CON_Just 6 45 | 46 | %define HEAP_PTR r15 47 | %define STACK_PTR rbp 48 | %define STACK_PTR_COPY r14 49 | %define ARG0 rax 50 | %define ARG1 rcx 51 | %define ARG2 rdx 52 | 53 | %define DE_BRUIJN(idx) qword [ STACK_PTR - 8*(idx+1) ] 54 | %define DE_BRUIJN_COPY(idx) qword [ STACK_PTR_COPY - 8*(idx+1) ] 55 | %macro PUSH 1 56 | mov [STACK_PTR],%1 57 | add STACK_PTR,8 58 | %endmacro 59 | %macro POP 1 60 | sub STACK_PTR,8 61 | mov %1,[STACK_PTR] 62 | %endmacro 63 | 64 | %define INT_LITERAL(i) (i<<3) + 1 65 | %define CHR_LITERAL(c) INT_LITERAL(c) 66 | %define NULLARY_CON(con) (con<<3) + 4 67 | 68 | %define IS_HEAP_PTR test al,7 69 | 70 | %define FROM_INT_LITERAL sar rax,3 71 | %define FROM_INT_LITERAL_ARG0 sar ARG0,3 72 | %define FROM_INT_LITERAL_ARG1 sar ARG1,3 73 | %macro FROM_INT_LITERAL_ARGS01 0 74 | FROM_INT_LITERAL_ARG0 75 | FROM_INT_LITERAL_ARG1 76 | %endmacro 77 | %macro TO_INT_LITERAL 0 78 | shl rax,3 79 | or al,1 80 | %endmacro 81 | 82 | %define FROM_BOOL_CON shr rax,3 83 | %define FROM_BOOL_CON_ARG0 shr ARG0,3 84 | %define FROM_BOOL_CON_ARG1 shr ARG1,3 85 | %macro FROM_BOOL_CON_ARGS01 0 86 | FROM_BOOL_CON_ARG0 87 | FROM_BOOL_CON_ARG1 88 | %endmacro 89 | %macro TO_BOOL_CON 0 90 | shl rax,3 91 | or al,2 92 | %endmacro 93 | 94 | %define FROM_STATIC_FUNPTR and al,0xf8 95 | %define TO_STATIC_FUNPTR or al,2 96 | 97 | %define TAG_DATACON(con_tag ,con_arity) (con_arity<<16) + (con_tag << 3) + 4 98 | %define TAG_CLOSURE(env_size,rem_arity) (env_size <<16) + (rem_arity << 3) + 0 99 | 100 | ; args: size in words 101 | %macro ALLOCATE_HEAP 1 102 | mov rax,HEAP_PTR 103 | add HEAP_PTR , (8 * %1) 104 | %endmacro 105 | 106 | %macro DYN_ALLOCATE_HEAP 1 107 | mov rax,%1 108 | shl rax,3 109 | add rax,HEAP_PTR 110 | xchg rax,HEAP_PTR 111 | %endmacro 112 | 113 | ; args: static label, env size, rem_arity 114 | %macro ALLOCATE_CLOSURE 3 115 | ALLOCATE_HEAP (%2 + 2) 116 | mov rdx,TAG_CLOSURE(%2,%3) 117 | mov [rax],rdx 118 | mov rdx,%1 119 | mov [rax+8],rdx 120 | %endmacro 121 | 122 | %macro DYN_ALLOCATE_CLOSURE 3 123 | mov rax,%1 124 | add rax,2 125 | shl rax,3 126 | add rax,HEAP_PTR 127 | xchg rax,HEAP_PTR 128 | mov rdx,%2 ; env size 129 | shl rdx,13 130 | add rdx,%3 ; rem arity 131 | shl rdx,3 132 | mov [rax],rdx 133 | mov rdx,%1 134 | mov [rax+8],rdx 135 | %endmacro 136 | 137 | ; args: con_tag, con_arity 138 | %macro ALLOCATE_DATACON 2 139 | ALLOCATE_HEAP (%2 + 1) 140 | mov rdx,TAG_DATACON(%1,%2) 141 | mov [rax],rdx 142 | %endmacro 143 | 144 | ; args: con_tag, con_arity 145 | %macro DYN_ALLOCATE_DATACON 2 146 | mov rax,%1 147 | inc rax 148 | shl rax,3 149 | add rax,HEAP_PTR 150 | xchg rax,HEAP_PTR 151 | mov rdx,%2 ; con arity 152 | shl rdx,13 153 | add rdx,%1 ; con tag 154 | shl rdx,3 155 | mov [rax],rdx 156 | %endmacro 157 | 158 | ;------------------------------------------------------------------------------- 159 | 160 | . "; evaluate a thunk on RAX" 161 | rts_eval_thunk: 162 | IS_HEAP_PTR 163 | jne .singleword 164 | mov rdx,[rax] 165 | test dx,dx ; thunk = low 3 bits is zero and the next 13 bits too 166 | jnz .evaluated 167 | .thunk: 168 | push rsi 169 | push rdi 170 | mov rsi,rax 171 | mov rcx,[rsi] 172 | shr rcx,16 ; size of the environment 173 | push rcx 174 | lea rdi,[rsi+16] 175 | .push_loop: 176 | mov rax,[rdi] 177 | PUSH rax 178 | add rdi,8 179 | loop .push_loop 180 | call [rsi+8] 181 | pop rcx 182 | sub STACK_PTR,rcx 183 | pop rdi 184 | pop rsi 185 | retn 186 | .singleword: 187 | mov dl,al 188 | and dl,7 189 | cmp dl,2 190 | je .static_funptr 191 | .evaluated: 192 | retn 193 | .static_funptr: 194 | mov rbx,rax 195 | and bl,0xf8 196 | mov rcx,[rbx-8] ; env size / arity 197 | or rcx,rcx ; both is zero? 198 | jnz .evaluated 199 | call rbx ; example: CAF like a string. No environment, no arguments 200 | retn 201 | 202 | ;--------------------------------------- 203 | 204 | ; apply to an unknown function. RAX = heap ptr to function 205 | ; RCX = number of arguments on the stack (assumed to be positive!) 206 | rts_apply: 207 | IS_HEAP_PTR 208 | jne .single_word ; either a literal or a static function ptr 209 | mov rdx,[rax] ; tag word 210 | test dl,7 211 | jz .closure 212 | 213 | .single_word: 214 | mov dl,al 215 | and dl,7 216 | cmp dl,2 217 | jne .app_to_nonlambda 218 | .static_funptr: 219 | ; TODO: maybe jump to somewhere in the middle of .closure to solve this? 220 | 221 | ; fatal error: application to non-lambda and non-constructor 222 | .app_to_nonlambda: 223 | mov rsi,errmsg_app 224 | mov rdx,errmsg_app.len 225 | call rts_local_stderr_string 226 | mov rax,666 227 | jmp rts_exit 228 | 229 | .datacon: 230 | mov rsi,rax 231 | mov r8,rdx 232 | mov r9,rdx 233 | shr r8,16 234 | and r8,0xffff ; r8 = con tag 235 | shr r9,3 236 | and r9,0x1fff ; r9 = old arity 237 | mov r10,r9 238 | add r10,rcx ; r10 = new arity 239 | DYN_ALLOCATE_DATACON r8,r10 240 | mov rdi,rax 241 | push rcx 242 | mov rcx,r9 243 | mov rbx,8 244 | .copy_old_args: 245 | mov rax,[rsi+rbx] 246 | mov [rdi+rbx],rax 247 | add rbx,8 248 | .loop copy_old_args 249 | pop rcx 250 | .copy_new_args: 251 | mov rax,[STACK_PTR-8*rcx+8] 252 | mov [rdi+rbx],rax 253 | add rbx,8 254 | .loop copy_new_args 255 | mov rax,rdi 256 | retn 257 | 258 | .closure: 259 | mov r8,rdx 260 | mov r9,rdx 261 | shr r8,16 262 | and r8,0xffff ; r8 = env size 263 | shr r9,3 264 | and r9,0x1fff ; r9 = arity 265 | cmp rcx,r9 ; actual args vs arity 266 | je .equal 267 | jl .less 268 | jmp .greater 269 | 270 | ; TODO: push the environment then push the arguments then call (callee frees) 271 | .equal: 272 | push rax 273 | or r8,r8 274 | jz .do_not_insert_env 275 | mov rsi,STACK_PTR 276 | lea rdi,[STACK_PTR + r8 * 8] 277 | .move_args_loop: 278 | sub rsi,8 279 | sub rdi,8 280 | mov rax,[rsi] 281 | mov [rdi],rax 282 | loop .move_args_loop: 283 | mov rcx,r8 284 | mov rdi,rsi 285 | pop rsi 286 | push rsi 287 | add rsi,8 288 | rep movsq 289 | .do_not_insert_env: 290 | pop rax 291 | jmp rax 292 | 293 | ; TODO: allocate closure, copy the environment then copy the arguments 294 | .less: 295 | 296 | ; TODO: push the environment then push the necessary arguments then call; 297 | ; then recursively call rts_apply for the remaining args 298 | .greater: 299 | 300 | 301 | -------------------------------------------------------------------------------- /asm_shit/sys_macos.asm: -------------------------------------------------------------------------------- 1 | 2 | ; MacOS syscalls 3 | 4 | rts_exit: 5 | FROM_INT_LITERAL 6 | mov rdi,rax 7 | mov rax, 0x2000001 ; exit 8 | syscall 9 | retn 10 | 11 | rts_putchar_stderr: 12 | mov rdi,2 ; stderr = 2 13 | jmp rts_local_hputchar_literal 14 | 15 | rts_putchar: 16 | mov rdi,1 ; stdout = 1 17 | ; jmp rts_local_hputchar_literal 18 | 19 | rts_local_hputchar_literal: 20 | FROM_INT_LITERAL ; FROM_CHR_LITERAL 21 | rts_local_hputchar: 22 | push rax 23 | mov rsi,rsp 24 | mov rdx,1 ; length = 1 character 25 | mov rax,0x2000004 ; write 26 | syscall 27 | pop rax 28 | mov rax,NULLARY_CON(CON_Unit) ; Unit 29 | retn 30 | 31 | rts_getchar: 32 | mov rdi,0 ; stdin = 0 33 | call rts_local_hgetchar 34 | test rax,rax 35 | js .error 36 | TO_INT_LITERAL ; TO_CHR_LITERAL 37 | mov rdx,rax 38 | ALLOCATE_DATACON CON_Just,1 ; Just 39 | mov [rax+8],rdx 40 | retn 41 | .error: 42 | mov rax,NULLARY_CON(CON_Nothing) ; Nothing 43 | retn 44 | 45 | ; returns a character code or -1 if error 46 | rts_local_hgetchar: 47 | push rax ; alloca 8 bytes 48 | mov rsi,rsp ; buffer 49 | mov rdx,1 ; length = 1 character 50 | mov rax,0x2000003 ; read 51 | syscall 52 | or rax,rax ; bytes read (0 or 1) 53 | jnz .succeed 54 | pop rdx 55 | xor rax,rax 56 | dec rax ; -1 = error 57 | retn 58 | .succeed: 59 | pop rax 60 | retn 61 | 62 | ; rsi = string ; rdx = length of the string 63 | rts_local_stderr_string: 64 | mov rdi,2 ; stderr = 2 65 | mov rax,0x2000004 ; write 66 | syscall 67 | retn 68 | 69 | rts_error: 70 | IS_HEAP_PTR 71 | jnz .exit 72 | cmp rax,TAG_DATACON(CON_Cons,2) ; Cons cell 73 | jnz .exit 74 | push rax 75 | mov rax,rax[8] 76 | call rts_putchar_stderr 77 | pop rax 78 | mov rax,rax[16] 79 | jmp rts_error 80 | .exit: 81 | mov rdi,666 ; exit code 82 | mov rax, 0x2000001 ; exit 83 | syscall 84 | retn 85 | -------------------------------------------------------------------------------- /bootstrap.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | rm nanohs_via_ghc.exe 2>/dev/null 4 | rm nanohs_via_ghc_00.exe 2>/dev/null 5 | rm nanohs_via_ghc_01.exe 2>/dev/null 6 | rm nanohs_stage1.c 2>/dev/null 7 | rm nanohs_stage2.c 2>/dev/null 8 | rm nanohs_stage3.c 2>/dev/null 9 | rm nanohs_stage1.exe 2>/dev/null 10 | rm nanohs_stage2.exe 2>/dev/null 11 | 12 | echo "" ; echo "===================" 13 | echo "compiling a bootstrap (stage #0) compiler via GHC" 14 | ghc -O0 --make -main-is Nano.main Nano.hs -o nanohs_via_ghc_O0.exe 15 | #ghc -O1 --make -main-is Nano.main Nano.hs -o nanohs_via_ghc_O1.exe 16 | 17 | rm *.o 18 | rm *.hi 19 | 20 | echo "" ; echo "===================" 21 | echo "compiling a stage #1 (unoptimized) compiler via the bootstrapped one (stage #0)" 22 | ./nanohs_via_ghc_O0.exe -c Nano.hs nanohs_stage1.c 23 | echo "running gcc on stage #1 C source..." 24 | gcc -O3 -Wl,-stack_size -Wl,0x3000000 nanohs_stage1.c -o nanohs_stage1.exe 25 | 26 | echo "" ; echo "===================" 27 | echo "compiling a stage #2 (optimized) compiler via stage #1" 28 | ./nanohs_stage1.exe -o Nano.hs nanohs_stage2.c 29 | echo "running gcc on stage #2 C source..." 30 | gcc -O3 -Wl,-stack_size -Wl,0x3000000 nanohs_stage2.c -o nanohs_stage2.exe 31 | 32 | echo "" ; echo "===================" 33 | echo "compiling a stage #3 (optimized) compiler via stage #2" 34 | ./nanohs_stage2.exe -o Nano.hs nanohs_stage3.c 35 | 36 | echo "" ; echo "===================" 37 | echo "comparing the stage #2 and stage #3 outputs:" 38 | DIFF=`diff -q nanohs_stage2.c nanohs_stage3.c` 39 | if [[ ! -z "$DIFF" ]] 40 | then 41 | echo $DIFF 42 | else 43 | echo OK. 44 | fi 45 | -------------------------------------------------------------------------------- /examples/church.nano: -------------------------------------------------------------------------------- 1 | 2 | {-# LANGUAGE NoImplicitPrelude, Rank2Types, TypeApplications #-} 3 | 4 | import PrimGHC 5 | import Prelude ( Int , undefined ) 6 | import Base 7 | 8 | {-% include "Base.hs" %-} 9 | 10 | main = runIO action 11 | 12 | action = iosequence_ 13 | [ putStrLn "church number example" 14 | , print (natToInt (pow ten (intToNat 4))) 15 | ] 16 | 17 | data Nat = Nat (forall a. a -> (a -> a) -> a) 18 | 19 | elim :: Nat -> a -> (a -> a) -> a 20 | elim nat z s = case nat of { Nat n -> n z s } 21 | 22 | natToInt :: Nat -> Int 23 | natToInt n = elim n 0 inc 24 | 25 | intToNat :: Int -> Nat 26 | intToNat n = ifte (eq n 0) zero (succ (intToNat (dec n))) 27 | 28 | zero :: Nat 29 | zero = Nat (\z s -> z) 30 | 31 | succ :: Nat -> Nat 32 | succ n = Nat (\z s -> s (elim n z s)) 33 | 34 | one = Nat (\z s -> s (elim zero z s)) 35 | two = Nat (\z s -> s (elim one z s)) 36 | three = Nat (\z s -> s (elim two z s)) 37 | four = Nat (\z s -> s (elim three z s)) 38 | five = Nat (\z s -> s (elim four z s)) 39 | six = Nat (\z s -> s (elim five z s)) 40 | 41 | ten = add five five 42 | hundred = mul ten ten 43 | thousand = mul hundred ten 44 | 45 | add :: Nat -> Nat -> Nat 46 | add n m = Nat (\z s -> elim m (elim n z s) s) 47 | 48 | mul :: Nat -> Nat -> Nat 49 | mul n m = elim n zero (add m) 50 | 51 | pow :: Nat -> Nat -> Nat 52 | pow n e = elim e one (mul n) 53 | 54 | -------------------------------------------------------------------------------- /examples/test.nano: -------------------------------------------------------------------------------- 1 | 2 | -- fun1 x y = plus x y 3 | -- fun2 a b = minus a b 4 | 5 | inc x = plus x 1 6 | dec x = minus x 1 7 | 8 | neq x y = not (eq x y) 9 | 10 | gt x y = lt y x 11 | 12 | ge x y = le y x 13 | 14 | compare x y = ifte (lt x y) LT (ifte (eq x y) EQ GT) 15 | 16 | foldl f x0 list = go x0 list where 17 | { go x ls = case ls of { Nil -> x ; Cons y ys -> go (f x y) ys } 18 | } 19 | 20 | sum = foldl plus 0 21 | 22 | range = rangeFrom 0 23 | rangeFrom k n = ifte (gt n 0) (Cons k (rangeFrom (inc k) (dec n))) Nil 24 | 25 | range' = rangeFrom' 0 26 | rangeFrom' = worker where { 27 | worker k n = ifte (gt n 0) (Cons k (worker (inc k) (dec n))) Nil } 28 | 29 | reverse = foldl (\xs x -> Cons x xs) Nil 30 | 31 | showNat n = ifte (lt n 0) (error "showNat: negative") (worker n) where 32 | { worker n = ifte (eq n 0) "0" (reverse (go n)) 33 | ; go n = ifte (eq n 0) Nil (Cons (chr (plus (mod n 10) 48)) (go (div n 10))) 34 | } 35 | 36 | main = Pair 37 | (showNat (sum (range' 101))) 38 | (sum (rangeFrom 100 7)) 39 | 40 | -------------------------------------------------------------------------------- /examples/test5.nano: -------------------------------------------------------------------------------- 1 | 2 | {-% include "Base.hs" %-} 3 | 4 | main = runIO (ioseq action6 (ioseq sep (ioseq action7 action_debug))) 5 | 6 | action_debug = iosequence_ 7 | [ sep 8 | , putStrLn "some construcor names:" 9 | , print (Triple 101 202 303) 10 | , print (SomeConstructor "hello" 666) 11 | ] 12 | 13 | -- big_action = putStrLn (showInt (length (big2 10 10 100))) 14 | 15 | big0 n = concat (replicate n "0123456789" ) 16 | big1 m n = concat (replicate m (big0 n) ) 17 | big2 k m n = concat (replicate k (big1 m n)) 18 | 19 | sep = putStrLn (replicate 20 '=') 20 | 21 | action7 = iobind (readFile "a.txt") (\text -> ioseq 22 | (putStrLn "contents of the file `a.txt`:") (putStrLn (unlines (map ff (lines text))))) 23 | 24 | myPrintJust mb = case mb of { Just n -> putStrLn (append "fuck -> " (showInt n)) } 25 | 26 | text = unlines [ "alma" , "korte" , "szilva" ] 27 | text2 = unlines (map ff (lines text)) 28 | ff x = append ">>> " x 29 | 30 | action6 = iobind getArgs (\list -> iosequence_ 31 | [ putStrLn (append "argc = " (showInt (length list))) 32 | , iomapM_ putStrLn list 33 | ]) 34 | 35 | -------------------------------------------------------------------------------- /main.hs: -------------------------------------------------------------------------------- 1 | 2 | module Main where 3 | 4 | import qualified Nano 5 | 6 | main = Nano.main 7 | 8 | -------------------------------------------------------------------------------- /nanohs.cabal: -------------------------------------------------------------------------------- 1 | Cabal-Version: 2.4 2 | Name: nanohs 3 | Version: 0.5 4 | Synopsis: nanohs - a self-hosting lambda calculus compiler 5 | Description: A self-hosting lambda calculus compiler. 6 | See the README fore more information. 7 | 8 | License: BSD-3-Clause 9 | Author: Balazs Komuves 10 | Copyright: (c) 2021 Balazs Komuves 11 | Maintainer: bkomuves+hackage@gmail.com 12 | Homepage: https://github.com/bkomuves/nanohs 13 | Stability: Experimental 14 | Category: Language 15 | Tested-With: GHC == 8.6.5 16 | Build-Type: Simple 17 | 18 | Source-repository head 19 | type: git 20 | location: https://github.com/bkomuves/nanohs 21 | 22 | -------------------------------------------------------------------------------- 23 | 24 | library 25 | 26 | Build-Depends: base >= 4 && < 5 27 | Hs-Source-Dirs: . 28 | 29 | exposed-modules: Base 30 | Closure 31 | CodeGen 32 | Containers 33 | Core 34 | DataCon 35 | Dependency 36 | Eval 37 | Inliner 38 | Nano 39 | Parser 40 | PrimGHC 41 | PrimOps 42 | ScopeCheck 43 | Syntax 44 | Types 45 | 46 | Default-Language: Haskell2010 47 | Default-extensions: Strict, NoImplicitPrelude, FlexibleInstances, FlexibleContexts 48 | Other-extensions: CPP 49 | 50 | -------------------------------------------------------------------------------- 51 | 52 | executable nanohs_bootstrap.exe 53 | 54 | Build-Depends: base >= 4 && < 5, nanohs >= 0.5 55 | main-is: main.hs 56 | Default-Language: Haskell2010 57 | Hs-Source-Dirs: . 58 | 59 | -------------------------------------------------------------------------------- 60 | -------------------------------------------------------------------------------- /slides/.gitignore: -------------------------------------------------------------------------------- 1 | *.aux 2 | *.log 3 | *.nav 4 | *.out 5 | *.snm 6 | *.toc 7 | *.vrb 8 | -------------------------------------------------------------------------------- /slides/nanohs_slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bkomuves/nanohs/dd052656ba72934e3be13f9968ef280565e7b03c/slides/nanohs_slides.pdf -------------------------------------------------------------------------------- /slides/nanohs_slides.tex: -------------------------------------------------------------------------------- 1 | 2 | \documentclass{beamer} 3 | \mode 4 | \usepackage{color} 5 | \usepackage{fancyvrb} 6 | \usefonttheme[onlymath]{serif} 7 | 8 | \title{Writing a self-hosting compiler \\ for a purely functional language\\ 9 | {\footnotesize (work in progress)}} 10 | \author{Bal\'azs K\H{o}m\H{u}ves \\ {\small [{\tt bkomuves@gmail.com}]}} 11 | \date{2021.03.24.} 12 | 13 | %------------------------------------------------------------------------------% 14 | 15 | \begin{document} 16 | 17 | \begin{frame} 18 | \titlepage 19 | \end{frame} 20 | 21 | %------------------------------------------------------------------------------% 22 | 23 | \begin{frame}{What is this?} 24 | 25 | This is an {\bf experiment} to write a minimalistic self-hosting compiler 26 | in a purely functional language. It's intended both as a learning 27 | experience (for myself) and an experiment in bootstrapping.\\[15pt] 28 | 29 | Goals: 30 | \begin{itemize} 31 | \item self-hosting compiler 32 | \item very small code-base 33 | \item bootstrappable using a Haskell implementation 34 | \item generated code should have ``acceptable'' quality \\[15pt] 35 | \end{itemize} 36 | 37 | Non-goals: 38 | \begin{itemize} 39 | \item a fully-featured language 40 | \item efficient compilation (the compiler can be slow) 41 | \item bootstrapping ``real'' compilers from this code base 42 | \end{itemize} 43 | 44 | \end{frame} 45 | 46 | %------------------------------------------------------------------------------% 47 | 48 | \begin{frame}{Why self-host?} 49 | 50 | Why do bootstrapping / self-hosting?\\[10pt] 51 | 52 | \begin{itemize} 53 | \item I like simple, self-contained stuff 54 | \item self-hosting is a philosophically desirable property 55 | \item future-proofness: If you can bootstrap from something simple, it won't bit-rot that easily 56 | \item it's a test of the maturity of the compiler 57 | \item the compiler itself is a very good test program! \\[15pt] 58 | \end{itemize} 59 | 60 | Exhibit A: Maru, a tiny self-hosting LISP dialect was successfully revived and 61 | extended by Attila.\\[15pt] 62 | 63 | Exhibit B: The York Haskell Compiler (forked from {\tt nhc98}) is now dead, apparently 64 | because \emph{nobody knows how to build it anymore}. 65 | 66 | \end{frame} 67 | 68 | %------------------------------------------------------------------------------% 69 | 70 | \begin{frame}{The language} 71 | 72 | The syntax and semantics is based on (strict) Haskell - the idea is that the compiler 73 | should be compilable by both GHC and itself. 74 | 75 | This makes development much easier!\\[20pt] 76 | 77 | The language is basically untyped call-by-value lambda calculus with data constructors 78 | and recursive lets. 79 | During parsing, Haskell features like type annotations, data type declarations, imports etc 80 | are simply ignored - but the compiler itself is a well-typed program!\\[20pt] 81 | 82 | In fact, a type checker should be probably added, after all; but I wanted to keep it simple. 83 | 84 | \end{frame} 85 | 86 | %------------------------------------------------------------------------------% 87 | 88 | \begin{frame}{Language (non-)features} 89 | 90 | {\small 91 | \begin{itemize} 92 | \item no static type system (untyped lambda calculus) 93 | \item na data type declarations (constructors are arbitrary capitalized names) 94 | \item no module system - instead, we have C-style includes 95 | \item strict language (exceptions: if-then-else, logical {\tt and}/{\tt or}) 96 | \item ML-style side effects, but wrapped in a monad 97 | \item only simple pattern matching + default branch (TODO: nested patterns) 98 | \item no infix operators 99 | \item list construction syntax {\tt [a,b,c]} is supported 100 | \item no indentation syntax (only curly braces), except for top-level blocks 101 | \item only line comments, starting at the beginning of the line 102 | \item no escaping in character / string constants yet 103 | \item (universal polymorphic equality comparison primop) 104 | \end{itemize} 105 | } 106 | 107 | \end{frame} 108 | 109 | %------------------------------------------------------------------------------% 110 | 111 | \begin{frame}{Compilation pipeline} 112 | 113 | \begin{enumerate} 114 | \item lexing and parsing 115 | \item collecting string literals 116 | \item partition recursive lets into strongly connected components 117 | \item TODO: eliminate pattern matching into simple branching on constructors 118 | \item recognize primops 119 | \item collect data constructors 120 | \item scope checking \& conversion to core language 121 | \item inline small functions + some basic optimizations 122 | \item closure conversion 123 | \item TODO: compile to a low-level intermediate language 124 | \item final code generation 125 | \end{enumerate} 126 | 127 | \end{frame} 128 | 129 | %------------------------------------------------------------------------------% 130 | 131 | \begin{frame}{The runtime system} 132 | 133 | The runtime system is relatively straightforward: 134 | 135 | {\small 136 | \begin{itemize} 137 | \item there is a heap, which can only contain closures and data constructors 138 | \item there is stack (separate from the C stack) which contains pointers to the heap 139 | \item heap pointers are tagged using the lowest 3 bits 140 | \item heap objects fitting into 61 bits (ints, chars, nullary constructors, static functions) are 141 | not allocated, just stored in registers / on stack 142 | \item statically unknown application is implemented as a runtime primitive 143 | \item there is a simple copying GC - the GC roots are simply the content of the stack 144 | (plus statically known data) 145 | \item there are some debug features like printing heap objects 146 | \item primops: integer arithmetic; integer comparison; bitwise operations; lazy {\tt ifte/and/or}; basic IO. 147 | \end{itemize} 148 | } 149 | 150 | \end{frame} 151 | 152 | %------------------------------------------------------------------------------% 153 | 154 | \begin{frame}{Current state of the experiment} 155 | 156 | Repo: \url{https://github.com/bkomuves/nanohs}\\[15pt] 157 | 158 | Current state: 159 | \begin{itemize} 160 | \item it can compile itself successfully; 161 | \item the source code is not as nice as it could be; 162 | \item the generated code is pretty horrible; 163 | \item there are some desirable features missing.\\[15pt] 164 | \end{itemize} 165 | 166 | Code size: 167 | \begin{itemize} 168 | \item the compiler: $\sim 1900$ lines (required code) 169 | \item type annotations: $+525$ lines (ignored) 170 | \item the C runtime: $\sim 600$ lines (including some debug features) 171 | \end{itemize} 172 | 173 | \end{frame} 174 | 175 | %------------------------------------------------------------------------------% 176 | 177 | \begin{frame}{Mistakes I made} 178 | 179 | {\small 180 | I made a huge amount of mistakes during the development, which made it much 181 | harder and longer than I expected. 182 | 183 | \begin{itemize} 184 | \item Trying to self-host before the compiler actually works. Writing a 185 | compiler is hard work, you don't want to do it in a limited language! 186 | \item Not figuring out the precise semantics upfront: 187 | \begin{itemize} 188 | \item exact details of lazyness vs. strictness 189 | \item controlling exactly where heap / stack allocations can happen 190 | \item recursive lets 191 | \end{itemize} 192 | \item Not having debugging support from early on (for example: good error messages, 193 | threading names \& source locations, RTS features, pretty printing AST-s) 194 | \item Trying to target assembly first, instead of a simpler target like C; it's just extra cognitive load 195 | \item Generating code directly, without having a low-level IL 196 | \item Using de Bruijn indices (as opposed to levels) in the generated code 197 | \end{itemize} 198 | } 199 | 200 | \end{frame} 201 | 202 | %------------------------------------------------------------------------------% 203 | 204 | \begin{frame}{Bug gallery} 205 | 206 | {\small 207 | Some examples of bugs which were really painful to debug: 208 | 209 | \begin{itemize} 210 | \item IO ordering bugs (GHC \emph{really} likes to optimize my IO implementations away...) 211 | \item GC liveness bugs (see the next slide) 212 | \item stack management, and using de Bruijn indices in the generated code 213 | \item C stack overflow gives you a segfault with no information 214 | \item thunks (there shouldn't be any, this is supposed to be a strict language after all...) 215 | \item recursive lets are a PITA if you allow non-lambdas 216 | \item there was a very simple bug (wrong variable name) in the implementation of generic 217 | application in the RTS which was not triggered by compiling the whole compiler, only 218 | after I did inlining 219 | \item inlining code which does computation makes the runtime blow up exponentially, 220 | which looks \emph{very} similar to an infinite loop... 221 | \end{itemize} 222 | } 223 | 224 | \end{frame} 225 | 226 | %------------------------------------------------------------------------------% 227 | 228 | \begin{frame}[fragile]{Example GC bug - Marshalling from C strings} 229 | 230 | {\footnotesize 231 | \begin{verbatim} 232 | heap_obj marshal_from_cstring(char *cstring) { ... 233 | 234 | v1: obj = heap_allocate(Cons,2); 235 | obj[1] = cstring[0]; 236 | obj[2] = marshal_from_cstring(cstring+1); 237 | return obj; 238 | 239 | v2: rest = marshal_from_cstring(cstring+1); 240 | obj = heap_allocate(Cons,2); 241 | obj[1] = cstring[0]; 242 | obj[2] = rest; 243 | return obj; 244 | 245 | v3: rest = marshal_from_cstring(cstring+1); 246 | push(rest); 247 | obj = heap_allocate(Cons,2); 248 | obj[1] = cstring[0]; 249 | obj[2] = pop(); 250 | return obj; 251 | \end{verbatim} 252 | } 253 | 254 | \end{frame} 255 | 256 | %------------------------------------------------------------------------------% 257 | 258 | \begin{frame}{Future work} 259 | 260 | I'm not convinced that it is worth hacking on this toy compiler (as opposed to work 261 | on a full-featured ``real'' compiler), but in any case, here is a TODO list:\\[10pt] 262 | 263 | \begin{itemize} 264 | \item add nested patterns, and patterns in lambda arguments 265 | \item do some optimizations, so that the generated code is better 266 | \item tail call elimination 267 | \item escaping in string constants 268 | \item compile to a low-level intermediate language first, and do codegen from that 269 | \item add a type system 270 | \item more backends: x86-64 assembly and/or machine code; some simple VM 271 | \end{itemize} 272 | 273 | \end{frame} 274 | 275 | 276 | %------------------------------------------------------------------------------% 277 | 278 | \end{document} 279 | -------------------------------------------------------------------------------- /sloc_count.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SRC="Base.hs Closure.hs CodeGen.hs Containers.hs Core.hs DataCon.hs Dependency.hs Eval.hs Inliner.hs Nano.hs Parser.hs PrimOps.hs ScopeCheck.hs Syntax.hs Types.hs" 4 | 5 | FULL=`cat $SRC | wc -l | sed -E 's/[ ]*([1-9]+)/\1/g'` 6 | ANNT=`grep "::" $SRC | wc -l | sed -E 's/[ ]*([1-9]+)/\1/g'` 7 | TYPE=`grep "^type " $SRC | wc -l | sed -E 's/[ ]*([1-9]+)/\1/g'` 8 | DATA=`grep "^data " $SRC | wc -l | sed -E 's/[ ]*([1-9]+)/\1/g'` 9 | DCON=`grep "^ |" $SRC | wc -l | sed -E 's/[ ]*([1-9]+)/\1/g'` 10 | DERV=`grep "^ deriving" $SRC | wc -l | sed -E 's/[ ]*([1-9]+)/\1/g'` 11 | 12 | HASKELL_CLOC=`cloc --quiet --csv $SRC | grep Haskell` 13 | ASM_CLOC=`cloc --quiet --csv rts.asm sys_macos.asm | grep Assembly` 14 | C_CLOC=`cloc --quiet --csv rts.c | grep C` 15 | 16 | IFS=',' #setting comma as delimiter 17 | read -a arr <<< "$HASKELL_CLOC" 18 | BLNK=${arr[2]} 19 | CMMT=${arr[3]} 20 | HSLN=${arr[4]} 21 | 22 | read -a arr <<< "$ASM_CLOC" 23 | ASML=${arr[4]} 24 | 25 | read -a arr <<< "$C_CLOC" 26 | CRTS=${arr[4]} 27 | 28 | echo "raw lines = $FULL" 29 | echo "blank = $BLNK" 30 | echo "comment = $CMMT" 31 | echo "haskell lines = $HSLN" 32 | echo "type annotations = $ANNT" 33 | echo "type aliases = $TYPE" 34 | echo "data declarations = $DATA" 35 | echo "data constructors = $DCON" 36 | echo "deriving = $DERV" 37 | 38 | IGNORED=`expr $ANNT + $TYPE + $DATA + $DCON + $DERV` 39 | ESSENTIAL=`expr $HSLN - $IGNORED` 40 | 41 | echo "============================" 42 | echo "haskell lines = $HSLN" 43 | echo "from this ignored = $IGNORED" 44 | echo "essential lines = $ESSENTIAL" 45 | echo "C runtime = $CRTS" 46 | echo "assembly runtime = $ASML" 47 | -------------------------------------------------------------------------------- /topsort/TopSortSCC.hs: -------------------------------------------------------------------------------- 1 | 2 | {-# LANGUAGE Strict, FlexibleContexts, PatternSynonyms #-} 3 | module TopSortSCC where 4 | 5 | import Control.Monad 6 | import Control.Monad.State.Strict 7 | 8 | import qualified Data.Map.Strict as Map ; import Data.Map.Strict (Map) 9 | import qualified Data.Set as Set ; import Data.Set (Set) 10 | 11 | import Data.List 12 | import System.Random 13 | 14 | -------------------------------------------------------------------------------- 15 | 16 | ex = Map.fromList exlist 17 | 18 | exlist = 19 | [ ( "a" , [ "a" , "b" ] ) 20 | , ( "b" , [ "c" , "d" , "e" , "j"] ) 21 | , ( "c" , [ "c" ] ) 22 | , ( "d" , [] ) 23 | , ( "e" , [ "d" ] ) 24 | , ( "f" , [ "g" ] ) 25 | , ( "g" , [ "h" , "i" ] ) 26 | , ( "h" , [ "f" , "j" ] ) 27 | , ( "i" , [] ) 28 | , ( "j" , [ "k" , "l" ] ) 29 | , ( "l" , [ "l" ] ) 30 | , ( "k" , [] ) 31 | ] 32 | 33 | pattern Pair x y = (x,y) 34 | ex2 = Map.fromList [Pair "foldl" [],Pair "main" ["foldl"],Pair "map" []] 35 | 36 | -------------------------------------------------------------------------------- 37 | 38 | type Vtx = String 39 | type Graph = Map Vtx [Vtx] 40 | 41 | graphToList :: Graph -> [(Vtx,Vtx)] 42 | graphToList g = [ (v,w) | (v,ws) <- Map.toList g , w <- ws ] 43 | 44 | graphFromList :: [(Vtx,Vtx)] -> Graph 45 | graphFromList = buildMap (:[]) (:) 46 | 47 | flipGraph :: Graph -> Graph 48 | flipGraph graph = insertKeys $ graphFromList $ map swap $ graphToList graph where 49 | swap (x,y) = (y,x) 50 | keys = Map.keys graph 51 | insertKeys g = foldl (\old k -> Map.alter h k old) g keys 52 | h Nothing = Just [] 53 | h (Just ys) = Just ys 54 | 55 | buildMap :: Ord k => (a -> b) -> (a -> b -> b) -> [(k,a)] -> Map k b 56 | buildMap f g xys = foldl' insert Map.empty xys where 57 | insert old (k,x) = Map.alter (h x) k old 58 | -- h :: a -> Maybe b -> Maybe b 59 | h x Nothing = Just (f x ) 60 | h x (Just y) = Just (g x y) 61 | 62 | randomGraph :: Int -> Double -> IO Graph 63 | randomGraph n threshold = 64 | do 65 | zs <- replicateM n2 $ randomRIO (0,1) 66 | return $ graphFromList [ (f i, f j) | (z,(i,j)) <- zip zs edges , z < threshold ] 67 | where 68 | n2 = n*n 69 | edges = [ (i,j) | i<-[1..n], j<-[1..n] ] 70 | f i = "v" ++ show i 71 | 72 | test = do 73 | forM_ [0.05,0.10..0.95] $ \threshold -> do 74 | putStrLn $ "threshold = " ++ show threshold 75 | graphs <- replicateM 25 (randomGraph 15 threshold) 76 | print $ map checkSCC graphs 77 | 78 | -------------------------------------------------------------------------------- 79 | 80 | checkSCC :: Graph -> Bool 81 | checkSCC graph = case checkSCC' graph of 82 | (b1,b2s,b3s) -> {- b1 && -} (and b2s) && (and b3s) 83 | -- the vertex set is not really known 84 | 85 | checkSCC' graph = ( checkSet , checkOrder , checkSC ) where 86 | 87 | scc = reverse $ tarjanSCC (flipGraph graph) 88 | 89 | lkpNeighs v = Map.findWithDefault [] v graph 90 | 91 | -- does the result covers the vertex set? 92 | checkSet = sort (concat scc) == Map.keys graph 93 | 94 | -- is it ordered (topologically sorted)? 95 | checkOrder = worker scc where 96 | worker [] = [] 97 | worker [this] = [] 98 | worker (this:rest) = thisOK : worker rest where 99 | thisOK = and [ w `elem` these | v <- this , w <- lkpNeighs v ] where 100 | these = this ++ concat rest 101 | 102 | -- are the components really SCC-s (can you reach each vertex from each other vertex?) 103 | checkSC = (map isSCC scc) 104 | 105 | isSCC vtxset = 106 | and [ w `elem` reachv | v <- vtxset , let reachv = Set.toList (reachableSet graph v) , w <- vtxset ] 107 | 108 | reachableSet :: Graph -> Vtx -> Set Vtx 109 | reachableSet graph vtx0 = execState (go vtx0) (Set.empty) where 110 | go :: Vtx -> State (Set Vtx) () 111 | go v = do 112 | visited <- get 113 | unless (Set.member v visited) $ do 114 | put (Set.insert v visited) 115 | let neighs = Map.findWithDefault [] v graph :: [Vtx] 116 | mapM_ go neighs 117 | 118 | -------------------------------------------------------------------------------- 119 | -- * Tarjan's topologically sorted SCC algorithm 120 | 121 | data Tarjan = Tarjan 122 | { _next :: Int 123 | , _stack :: [Vtx] 124 | , _links :: Map Vtx (Int,Int) -- ^ index and low-linkg 125 | , _output :: [[Vtx]] 126 | } 127 | deriving Show 128 | 129 | -- | Based on 130 | tarjanSCC :: Graph -> [[Vtx]] 131 | tarjanSCC graph = _output (execState (mapM_ worker vtxSet) iniState) where 132 | 133 | iniState = Tarjan 0 [] Map.empty [] 134 | 135 | vtxSet = Map.keys graph 136 | 137 | worker v = do 138 | get >>= \state -> case Map.lookup v (_links state) of 139 | Nothing -> scc v 140 | Just _ -> return () 141 | 142 | scc v = do 143 | -- init 144 | Tarjan next stack links output <- get 145 | let stack' = v : stack 146 | links' = Map.insert v (next,next) links 147 | next' = next + 1 148 | put (Tarjan next' stack' links' output) 149 | 150 | -- successor vertices 151 | forM_ (Map.findWithDefault [] v graph) $ \w -> do 152 | state <- get 153 | case Map.lookup w (_links state) of 154 | 155 | Nothing -> do 156 | scc w 157 | state <- get 158 | let links = _links state 159 | case Map.lookup v links of { Just (vi,vl) -> 160 | case Map.lookup w links of { Just (_ ,wl) -> 161 | put $ state { _links = Map.insert v (vi, min vl wl) links } } } 162 | 163 | Just (wi,wl) -> when (w `elem` (_stack state)) $ do 164 | let links = _links state 165 | case Map.lookup v links of { Just (vi,vl) -> 166 | case Map.lookup w links of { Just (wi,_ ) -> 167 | put $ state { _links = Map.insert v (vi, min vl wi) links } } } 168 | 169 | -- pop and generate 170 | get >>= \state -> case Map.lookup v (_links state) of 171 | Just (vi,vl) -> when (vi == vl) $ do 172 | Tarjan next stack links output <- get 173 | let (this , _v:stack') = span (/=v) stack 174 | unless (_v == v) $ error "fatal" 175 | let output' = (v:this) : output 176 | put (Tarjan next stack' links output') 177 | 178 | -------------------------------------------------------------------------------- 179 | 180 | --------------------------------------------------------------------------------