├── .ghci
├── .gitignore
├── Base.hs
├── Closure.hs
├── CodeGen.hs
├── Containers.hs
├── Core.hs
├── DataCon.hs
├── Dependency.hs
├── Eval.hs
├── Inliner.hs
├── LICENSE
├── Nano.hs
├── Parser.hs
├── PrimGHC.hs
├── PrimOps.hs
├── README.md
├── ScopeCheck.hs
├── Syntax.hs
├── Types.hs
├── _main_old.hs
├── asm_shit
    ├── rts.asm
    └── sys_macos.asm
├── bootstrap.sh
├── examples
    ├── church.nano
    ├── test.nano
    └── test5.nano
├── main.hs
├── nanohs.cabal
├── rts.c
├── slides
    ├── .gitignore
    ├── nanohs_slides.pdf
    └── nanohs_slides.tex
├── sloc_count.sh
└── topsort
    └── TopSortSCC.hs


/.ghci:
--------------------------------------------------------------------------------
1 | :set -XMagicHash
2 | :set -XStrict
3 | :set -XFlexibleInstances
4 | :set -XFlexibleContexts
5 | :set -XTypeSynonymInstances
6 | :set -XOverloadedLists
7 | :set -XOverloadedStrings


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | dist
 2 | bck/
 3 | *.o
 4 | *.hi
 5 | *.ll
 6 | a.out
 7 | tmp.c
 8 | tmp1.c
 9 | tmp2.c
10 | tmp.asm
11 | *.exe
12 | nanohs_stage1.c
13 | nanohs_stage2.c
14 | nanohs_stage3.c
15 | a.txt
16 | test1.nano
17 | test2.nano
18 | test3.nano


--------------------------------------------------------------------------------
/Base.hs:
--------------------------------------------------------------------------------
  1 | 
  2 | -- | Base library
  3 | 
  4 | {-# LANGUAGE CPP, NoImplicitPrelude, MagicHash #-}
  5 | {-# LANGUAGE Strict #-}
  6 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-}
  7 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-}
  8 | 
  9 | module Base where
 10 | 
 11 | --------------------------------------------------------------------------------
 12 | 
 13 | import Prelude ( Int , Char , Eq , Show )
 14 | import PrimGHC
 15 | 
 16 | --------------------------------------------------------------------------------
 17 | -- debugging hacks
 18 | 
 19 | #ifndef __GLASGOW_HASKELL__
 20 | debug_ str x   = debug str x x
 21 | debug  str x y = case action x ActionToken of { Unit -> y } 
 22 |   where { action x = ioseq (putStr ">>> ") (ioseq (putStr str) (print x)) }
 23 | #endif
 24 | 
 25 | --------------------------------------------------------------------------------
 26 | -- * Prelude
 27 | -- ** functions
 28 | 
 29 | -- data Unit = Unit deriving Show
 30 | 
 31 | id :: a -> a
 32 | id = \x -> x
 33 | 
 34 | const :: a -> b -> a
 35 | const x = \_ -> x
 36 | 
 37 | app :: (a -> b) -> a -> b
 38 | app f x = f x
 39 | 
 40 | compose :: (b -> c) -> (a -> b) -> (a -> c)
 41 | compose f g = \x -> f (g x)
 42 | 
 43 | compose3 :: (c -> d) -> (b -> c) -> (a -> b) -> (a -> d)
 44 | compose3 f g h = \x -> f (g (h x))
 45 | 
 46 | flip :: (a -> b -> c) -> (b -> a -> c)
 47 | flip f = \x y -> f y x
 48 | 
 49 | fix :: ((a -> b) -> (a -> b)) -> a -> b
 50 | fix u x = u (fix u) x
 51 | 
 52 | -- test_fix :: Int -> List Int  
 53 | -- test_fix x = fix (\rec n -> ifte (eq n 0) Nil (Cons n (rec (dec n)))) x
 54 | 
 55 | --------------------------------------------------------------------------------
 56 | -- ** numbers
 57 | 
 58 | inc :: Int -> Int
 59 | inc n = plus n 1
 60 | 
 61 | inc2 :: Int -> Int
 62 | inc2 n = plus n 2
 63 | 
 64 | dec :: Int -> Int
 65 | dec n = minus n 1
 66 | 
 67 | natRec :: a -> (a -> a) -> Int -> a
 68 | natRec z s = go where { go n = ifte (eq n 0) z (s (go (dec n))) }
 69 | 
 70 | ceq :: Char -> Char -> Bool
 71 | ceq c d = eq (ord c) (ord d)
 72 | 
 73 | cneq :: Char -> Char -> Bool
 74 | cneq c d = not (ceq c d)
 75 | 
 76 | gneq :: Eq a => a -> a -> Bool
 77 | gneq x y = not (geq x y)
 78 | 
 79 | neq :: Int -> Int -> Bool
 80 | neq x y = not (eq x y)
 81 | 
 82 | gt :: Int -> Int -> Bool
 83 | gt x y = lt y x
 84 | 
 85 | ge :: Int -> Int -> Bool
 86 | ge x y = le y x
 87 | 
 88 | min :: Int -> Int -> Int
 89 | min x y = ifte (le x y) x y
 90 | 
 91 | max :: Int -> Int -> Int
 92 | max x y = ifte (le x y) y x
 93 | 
 94 | data Ordering = LT | EQ | GT deriving Show
 95 | 
 96 | compare :: Int -> Int -> Ordering
 97 | compare x y = ifte (lt x y) LT (ifte (eq x y) EQ GT)
 98 | 
 99 | -- | the list [0,1,...n-1]
100 | range :: Int -> List Int
101 | range n = rangeFrom 0 n
102 | 
103 | -- | the list [k,k+1,...k+n-1]
104 | rangeFrom :: Int -> Int -> List Int
105 | rangeFrom k n = ifte (gt n 0) (Cons k (rangeFrom (inc k) (dec n))) Nil
106 | 
107 | -- -- | the list [-n+1,-n+2,...0] == reverse (map negate (range n))
108 | -- negativeDeBruijnRange :: Int -> List Int
109 | -- negativeDeBruijnRange n = rangeFrom (inc (negate n)) n
110 | 
111 | --------------------------------------------------------------------------------
112 | -- ** Bool
113 | 
114 | -- data Bool = True | False deriving Show
115 | 
116 | -- These are primops for now so they can short-circuit
117 | --
118 | -- and :: Bool -> Bool -> Bool
119 | -- and x ~y = case x of { True -> y ; False -> False }
120 | --
121 | -- or :: Bool -> Bool -> Bool
122 | -- or x ~y = case x of { True -> True ; False -> y }
123 | 
124 | -- This is a primop only for efficiency reasons
125 | --
126 | -- not :: Bool -> Bool
127 | -- not b = case b of { True -> False ; False -> True }
128 | 
129 | and3 :: Bool -> Bool -> Bool -> Bool
130 | and3 x y z = and (and x y) z
131 | 
132 | andList :: List Bool -> Bool
133 | andList list = case list of { Nil -> True ; Cons b bs -> ifte b (andList bs) False }
134 | 
135 | --------------------------------------------------------------------------------
136 | -- ** Maybe
137 | 
138 | -- data Maybe a = Nothing | Just a deriving Show
139 | 
140 | isJust :: Maybe a -> Bool
141 | isJust mb = case mb of { Nothing -> False ; Just _ -> True }
142 | 
143 | isNothing :: Maybe a -> Bool
144 | isNothing mb = case mb of { Nothing -> True ; Just _ -> False }
145 | 
146 | fromJust :: Maybe a -> a
147 | fromJust mb = case mb of { Just x -> x ; Nothing -> error "fromJust" }
148 | 
149 | mbfmap :: (a -> b) -> Maybe a -> Maybe b
150 | mbfmap f mb = case mb of { Just x -> Just (f x) ; Nothing -> Nothing }
151 | 
152 | catMaybes :: List (Maybe a) -> List a
153 | catMaybes mbs = go mbs where { go list = case list of { Nil -> Nil ; Cons mb rest ->
154 |   case mb of { Nothing -> go rest ; Just x -> Cons x (go rest) } } }
155 | 
156 | --------------------------------------------------------------------------------
157 | -- ** Either
158 | 
159 | data Either a b = Left a | Right b deriving Show
160 | 
161 | isLeft :: Either a b -> Bool
162 | isLeft ei = case ei of { Left _ -> True ; Right _ -> False }
163 | 
164 | isRight :: Either a b -> Bool
165 | isRight ei = case ei of { Left _ -> False ; Right _ -> True }
166 | 
167 | fromLeft :: Either a b -> a
168 | fromLeft ei = case ei of { Left x -> x ; Right _ -> error "fromLeft" }
169 | 
170 | fromRight :: Either a b -> b
171 | fromRight ei = case ei of { Right y -> y ; Left _ -> error "fromRight" }
172 | 
173 | --------------------------------------------------------------------------------
174 | -- ** Pair, Triple
175 | 
176 | -- data Pair a b = Pair a b deriving Show
177 | 
178 | fst :: Pair a b -> a
179 | fst pair = case pair of { Pair x _ -> x }
180 | 
181 | snd :: Pair a b -> b
182 | snd pair = case pair of { Pair _ y -> y }
183 | 
184 | swap :: Pair a b -> Pair b a
185 | swap pair = case pair of { Pair x y -> Pair y x }
186 | 
187 | first :: (a1 -> a2) -> Pair a1 b -> Pair a2 b
188 | first f pair = case pair of { Pair x y -> Pair (f x) y }
189 | 
190 | second :: (b1 -> b2) -> Pair a b1 -> Pair a b2
191 | second g pair = case pair of { Pair x y -> Pair x (g y) }
192 | 
193 | data Triple a b c = Triple a b c deriving Show
194 | 
195 | fst3 :: Triple a b c -> a
196 | fst3 triple = case triple of { Triple x _ _ -> x }
197 | 
198 | snd3 :: Triple a b c -> b
199 | snd3 triple = case triple of { Triple _ y _ -> y }
200 | 
201 | thd3 :: Triple a b c -> c
202 | thd3 triple = case triple of { Triple _ _ z -> z }
203 | 
204 | --------------------------------------------------------------------------------
205 | -- ** Lists
206 | 
207 | -- data List a = Nil | Cons a (List a) deriving (Eq)
208 | 
209 | singleton :: a -> List a
210 | singleton x = Cons x Nil
211 | 
212 | head :: List a -> a
213 | head ls = case ls of { Cons x _ -> x ; Nil -> error "head: empty list" }
214 | 
215 | tail :: List a -> List a
216 | tail ls = case ls of { Nil -> Nil ; Cons _ xs -> xs }
217 | 
218 | isNil :: List a -> Bool
219 | isNil ls = case ls of { Nil -> True ; Cons _ _ -> False }
220 | 
221 | mbSingleton :: List a -> Maybe a
222 | mbSingleton list = case list of { Cons x xs -> case xs of { Nil -> Just x ; _ -> Nothing } ; Nil -> Nothing }
223 | 
224 | mbPair :: List a -> Maybe (Pair a a)
225 | mbPair list = case list of { Cons x xs -> case xs of { Cons y ys -> case ys of 
226 |   { Nil -> Just (Pair x y) ; _ -> Nothing } ; Nil -> Nothing } ; Nil -> Nothing }
227 | 
228 | length :: List a -> Int
229 | length ls = case ls of { Nil -> 0 ; Cons _ xs -> inc (length xs) }
230 | 
231 | index :: Int -> List a -> a
232 | index k ls = case ls of
233 |   { Nil       -> error "index: out of bounds"
234 |   ; Cons x xs -> ifte (eq k 0) x (index (dec k) xs) }
235 | 
236 | elem :: Eq a => a -> List a -> Bool
237 | elem x = go where { go ls = case ls of { Nil -> False ; Cons y ys -> ifte (geq x y) True (go ys) } }
238 | 
239 | intElem :: Int -> List Int -> Bool
240 | intElem x = go where { go ls = case ls of { Nil -> False ; Cons y ys -> ifte (eq x y) True (go ys) } }
241 | 
242 | charElem :: Char -> List Char -> Bool
243 | charElem x = go where { go ls = case ls of { Nil -> False ; Cons y ys -> ifte (ceq x y) True (go ys) } }
244 | 
245 | stringElem :: String -> List String -> Bool
246 | stringElem x = go where { go ls = case ls of { Nil -> False ; Cons y ys -> ifte (stringEq x y) True (go ys) } }
247 | 
248 | findIndex :: (a -> Bool) -> List a -> Maybe Int
249 | findIndex cond list = go 0 list where { go j ls = case ls of { Nil -> Nothing 
250 |   ; Cons x xs -> case cond x of { True -> Just j ; _ -> go (inc j) xs }}}
251 | 
252 | unsafeFindIndex :: (a -> Bool) -> List a -> Int
253 | unsafeFindIndex cond list = case findIndex cond list of { Just k -> k ; Nothing -> error "unsafeFindIndex: not found" }
254 | 
255 | foldl :: (a -> b -> a) -> (a -> List b -> a)
256 | foldl f x0 list = go x0 list where
257 |   { go x ls = case ls of { Nil -> x ; Cons y ys -> go (f x y) ys }
258 |   }
259 | 
260 | flipFoldr :: (b -> a -> a) -> (List b -> a -> a)
261 | flipFoldr f list y0 = go list y0 where
262 |   { go ls y = case ls of { Nil -> y ; Cons x xs -> f x (go xs y) }
263 |   }
264 | 
265 | foldr :: (b -> a -> a) -> (a -> List b -> a)
266 | foldr f x list = flipFoldr f list x
267 | 
268 | scanl :: (a -> b -> a) -> (a -> List b -> List a)
269 | scanl f x0 list = go x0 list where
270 |   { go x ls = case ls of { Nil -> Cons x Nil ; Cons y ys -> Cons x (go (f x y) ys) }
271 |   }
272 | 
273 | -- | @scanl_ f a xs = init (scanl f a xs)@
274 | scanl_ :: (a -> b -> a) -> (a -> List b -> List a)
275 | scanl_ f x0 list = go x0 list where
276 |   { go x ls = case ls of { Nil -> Nil ; Cons y ys -> Cons x (go (f x y) ys) }
277 |   }
278 | 
279 | sum :: List Int -> Int
280 | sum ns = foldl plus 0 ns
281 | 
282 | reverse :: List a -> List a
283 | reverse list = foldl (\xs x -> Cons x xs) Nil list
284 | 
285 | snoc :: List a -> a -> List a
286 | snoc xs y = case xs of { Nil -> singleton y ; Cons z zs -> Cons z (snoc zs y) }
287 | 
288 | append :: List a -> List a -> List a
289 | append xs ys = case xs of { Nil -> ys ; Cons z zs -> Cons z (append zs ys) }
290 | 
291 | append3 :: List a -> List a -> List a -> List a
292 | append3 xs ys zs = append xs (append ys zs)
293 | 
294 | concat :: List (List a) -> List a
295 | concat lls = flipFoldr append lls Nil
296 | 
297 | -- | > reverseAppend xs ys = append (reverse xs) ys
298 | reverseAppend :: List a -> List a -> List a
299 | reverseAppend xs ys = case xs of { Nil -> ys ; Cons z zs -> reverseAppend zs (Cons z ys) }
300 | 
301 | map :: (a -> b) -> List a -> List b
302 | map f = go where { go xs = case xs of { Nil -> Nil ; Cons x xs -> Cons (f x) (map f xs) } }
303 | 
304 | for :: List a -> (a -> b) -> List b
305 | for xs f = map f xs
306 | 
307 | filter :: (a -> Bool) -> List a -> List a
308 | filter f = go where
309 |   { go list = case list of
310 |      { Nil -> Nil ; Cons x xs -> case f x of
311 |        { False -> go xs ; True -> Cons x (go xs) } } }
312 | 
313 | replicate :: Int -> a -> List a
314 | replicate n x = go n where { go m = ifte (eq m 0) Nil (Cons x (go (dec m))) }
315 | 
316 | take :: Int -> List a -> List a
317 | take n ls = case ls of { Nil -> Nil ; Cons x xs -> ifte (eq n 0) Nil (Cons x (take (dec n) xs)) }
318 | 
319 | drop :: Int -> List a -> List a
320 | drop n ls = case ls of { Nil -> Nil ; Cons x xs -> ifte (eq n 0) ls (drop (dec n) xs) }
321 | 
322 | takeWhile :: (a -> Bool) -> List a -> List a
323 | takeWhile cond list = go list where
324 |   { go ls = case ls of { Nil -> Nil ; Cons x xs -> case cond x of
325 |     { True -> Cons x (go xs) ; False -> Nil } } }
326 | 
327 | dropWhile :: (a -> Bool) -> List a -> List a
328 | dropWhile cond list = go list where
329 |   { go ls = case ls of { Nil -> Nil ; Cons x xs -> case cond x of
330 |     { True -> go xs ; False -> ls } } }
331 | 
332 | span :: (a -> Bool) -> List a -> Pair (List a) (List a)
333 | span cond xs = Pair (takeWhile cond xs) (dropWhile cond xs)
334 | 
335 | zipWith :: (a -> b -> c) -> List a -> List b -> List c
336 | zipWith f as bs = go as bs where { go ls1 ls2 = case ls1 of { Nil -> Nil ; Cons x xs -> case ls2 of
337 |   { Nil -> Nil ; Cons y ys -> Cons (f x y) (go xs ys) } } }
338 | 
339 | zip :: List a -> List b -> List (Pair a b)
340 | zip xs ys = zipWith Pair xs ys
341 | 
342 | unzip :: List (Pair a b) -> Pair (List a) (List b)
343 | unzip xys = case xys of { Nil -> Pair Nil Nil ; Cons this rest -> case this of
344 |   { Pair x y -> case unzip rest of { Pair xs ys -> Pair (Cons x xs) (Cons y ys) } } }
345 | 
346 | -- | Zip with @[0..n-1]@
347 | zipIndex :: List a -> List (Pair Int a)
348 | zipIndex xs = zip (range (length xs)) xs
349 | 
350 | -- | Zip with @[0..n-1]@
351 | zipWithIndex :: (a -> Int -> b) -> List a -> List b
352 | zipWithIndex f xs = zipWith f xs (range (length xs))
353 | 
354 | -- | Zip with @replicate n y@
355 | zipConst :: b -> List a -> List (Pair b a)
356 | zipConst y list = worker list where { worker l = case l of { Nil -> Nil ; Cons x xs -> Cons (Pair y x) (worker xs) }}
357 | 
358 | -- | Zip with @(first : repeat rest)@
359 | zipFirstRest :: b -> b -> List a -> List (Pair b a)
360 | zipFirstRest first rest xs = case xs of { Nil -> Nil ; Cons x xs -> Cons (Pair first x) (zipConst rest xs) }
361 | 
362 | --------------------------------------------------------------------------------
363 | -- ** Characters
364 | 
365 | singleQuoteC    = chr 39
366 | doubleQuoteC    = chr 34
367 | backslashC      = chr 92
368 | newlineC        = chr 10
369 | carriageReturnC = chr 13
370 | 
371 | isDigit :: Char -> Bool
372 | isDigit ch = and (ge k 48) (le k 57) where { k = ord ch }
373 | 
374 | mbDigit :: Char -> Maybe Int
375 | mbDigit ch = ifte (isDigit ch) (Just (minus (ord ch) 48)) Nothing
376 | 
377 | isUpper :: Char -> Bool
378 | isUpper ch = and (ge k 65) (le k 90) where { k = ord ch }
379 | 
380 | isLower :: Char -> Bool
381 | isLower ch = and (ge k 97) (le k 122) where { k = ord ch }
382 | 
383 | isAlpha    ch = or (isUpper ch) (isLower ch)
384 | isAlphaNum ch = or (isAlpha ch) (isDigit ch)
385 | isLower_   ch = or (ceq ch '_') (isLower ch)
386 | 
387 | --------------------------------------------------------------------------------
388 | -- ** Strings
389 | 
390 | -- type String = List Char
391 | 
392 | stringEq :: String -> String -> Bool
393 | stringEq a b = go a b where { go str1 str2 = case str1 of 
394 |   { Nil       -> case str2 of { Nil -> True  ; _ -> False }
395 |   ; Cons x xs -> case str2 of { Nil -> False ; Cons y ys -> and (ceq x y) (go xs ys) }}}
396 | 
397 | stringMember :: String -> List String -> Bool
398 | stringMember what ls = case ls of { Nil -> False ; Cons this rest -> case stringEq what this of
399 |   { True -> True ; False -> stringMember what rest }} 
400 | 
401 | charToString :: Char -> String
402 | charToString x = Cons x Nil
403 | 
404 | showBool :: Bool -> String
405 | showBool b = case b of { True -> "True" ; False -> "False" }
406 | 
407 | showChar :: Char -> String
408 | showChar c = Cons singleQuoteC (Cons c (Cons singleQuoteC Nil))
409 | 
410 | showString :: String -> String
411 | showString s = doubleQuoteString s
412 | 
413 | showNat :: Int -> String
414 | showNat n = ifte (lt n 0) (error "showNat: negative") (worker n) where
415 |   { worker n = ifte (eq n 0) "0" (reverse (go n))
416 |   ; go     n = ifte (eq n 0) Nil (Cons (chr (plus (mod n 10) 48)) (go (div n 10)))
417 |   }
418 | 
419 | showInt :: Int -> String
420 | showInt n = ifte (ge n 0) (showNat n) (Cons '-' (showNat (negate n)))
421 | 
422 | appendInt :: String -> Int -> String
423 | appendInt s n = append s (showInt n)
424 | 
425 | readNat :: String -> Maybe Int
426 | readNat str = case str of { Nil -> Nothing ; Cons x xs -> go (reverse str) } where
427 |   { go ds = case ds of { Nil -> Just 0 ; Cons x xs -> case mbDigit x of
428 |     { Just k -> case go xs of { Just n -> Just (plus k (times n 10)) ; Nothing -> Nothing }
429 |     ; Nothing -> Nothing } } }
430 | 
431 | readInt :: String -> Maybe Int
432 | readInt str = case str of
433 |   { Nil -> Nothing
434 |   ; Cons x xs -> ifte (ceq x '-') (mbfmap negate (readNat xs)) (readNat str) }
435 | 
436 | quoteChar :: Char -> String
437 | quoteChar x = Cons singleQuoteC (Cons x (Cons singleQuoteC Nil))
438 | 
439 | delimString :: Char -> Char -> String -> String
440 | delimString l r xs = Cons l (append xs (Cons r Nil))
441 | 
442 | backslashEn :: String
443 | backslashEn = [ backslashC  , 'n' ]
444 | 
445 | backslashDoubleQuote :: String
446 | backslashDoubleQuote = [ backslashC  , '"' ]
447 | 
448 | doubleQuoteString :: String -> String
449 | doubleQuoteString str = delimString doubleQuoteC doubleQuoteC str
450 | 
451 | escapedDoubleQuoteString :: String -> String
452 | escapedDoubleQuoteString str = append3 backslashDoubleQuote str backslashDoubleQuote
453 | 
454 | doubleQuoteStringLn :: String -> String
455 | doubleQuoteStringLn str = delimString doubleQuoteC doubleQuoteC (append str backslashEn)
456 | 
457 | quoteString :: String -> String
458 | quoteString what= delimString '`' '`' what
459 | 
460 | parenString :: String -> String
461 | parenString what = delimString '(' ')' what
462 | 
463 | intercalate :: List a -> List (List a) -> List a
464 | intercalate sep llist = go llist where
465 |   { go xss = case xss of
466 |     { Nil -> Nil ; Cons ys yss -> case yss of
467 |       { Nil -> ys
468 |       ; _   -> append ys (append sep (go yss)) } } }
469 | 
470 | unwords :: List String -> String
471 | unwords list = intercalate (Cons ' ' Nil) list
472 | 
473 | unlines :: List String -> String
474 | unlines list = intercalate (Cons newlineC Nil) list
475 | 
476 | lines :: String -> List String
477 | lines xs = case xs of { Nil -> Nil ; Cons _ _ -> case span (\x -> cneq x newlineC) xs of
478 |   { Pair this rest -> case rest of { Nil -> Cons this Nil ; Cons _ ys -> Cons this (lines ys) } } }
479 | 
480 | --------------------------------------------------------------------------------
481 | -- ** IO Monad
482 | 
483 | -- data ActionToken = ActionToken 
484 | -- type IO a = ActionToken -> a
485 | 
486 | ioret_ :: IO Unit
487 | ioret_ = ioreturn Unit
488 | 
489 | iofmap :: (a -> b) -> IO a -> IO b
490 | iofmap f action = iobind action (\x -> ioreturn (f x))
491 | 
492 | iojoin :: IO (IO a) -> IO a
493 | iojoin producer = iobind producer id
494 | 
495 | iocompose :: (b -> IO c) -> (a -> IO b) -> a -> IO c
496 | iocompose g f = \x -> iobind (f x) g
497 | 
498 | ioliftA2 :: (a -> b -> c) -> IO a -> IO b -> IO c
499 | ioliftA2 f act1 act2 = iobind act1 (\x -> iobind act2 (\y -> ioreturn (f x y)))
500 | 
501 | ioseq :: IO a -> IO b -> IO b
502 | ioseq f g = iobind f (\_ -> g)
503 | 
504 | iosequence_ :: List (IO a) -> IO Unit
505 | iosequence_ list = case list of { Nil -> ioret_ ; Cons a as -> ioseq a (iosequence_ as) }
506 | 
507 | iomapM :: (a -> IO b) -> List a -> IO (List b)
508 | iomapM f list = case list of { Nil -> ioreturn Nil ; Cons x xs -> ioliftA2 Cons (f x) (iomapM f xs) }
509 | 
510 | ioforM :: List a -> (a -> IO b) -> IO (List b)
511 | ioforM list f = iomapM f list
512 | 
513 | iomapM_ :: (a -> IO b) -> List a -> IO Unit
514 | iomapM_ f list = case list of { Nil -> ioreturn Unit ; Cons x xs -> ioseq (f x) (iomapM_ f xs) }
515 | 
516 | ioforM_ :: List a -> (a -> IO b) -> IO Unit
517 | ioforM_ list f = iomapM_ f list
518 | 
519 | getArgs :: IO (List String)
520 | getArgs = go 0 where { go k = iobind (getArg k) (\mb -> case mb of 
521 |   { Nothing   -> ioreturn Nil 
522 |   ; Just this -> iobind (go (inc k)) (\rest -> ioreturn (Cons this rest)) })}
523 | 
524 | putStr :: String -> IO Unit
525 | putStr s = hPutStr stdout s
526 | 
527 | putStrLn :: String -> IO Unit
528 | putStrLn str = ioseq (putStr str) (putChar (chr 10)) 
529 | 
530 | type FilePath = String
531 | 
532 | hGetContents :: Handle -> IO String
533 | hGetContents h = go h where { go h = iobind (hGetChar h) (\mb -> case mb of 
534 |   { Nothing -> ioreturn Nil 
535 |   ; Just y  -> iobind (go h) (\ys -> ioreturn (Cons y ys)) }) }
536 | 
537 | hPutStrLn :: Handle -> String -> IO Unit
538 | hPutStrLn h str = ioseq (hPutStr h str) (hPutChar h (chr 10)) 
539 | 
540 | withFile :: FilePath -> IOMode -> (Handle -> IO a) -> IO a
541 | withFile fn mode action =
542 |   iobind (openFile fn mode) (\handle -> 
543 |   iobind (action handle)    (\result -> 
544 |   iobind (hClose handle)    (\_      -> ioreturn result)))
545 | 
546 | readFile :: FilePath -> IO String
547 | readFile fn = withFile fn ReadMode hGetContents
548 | 
549 | writeFile :: FilePath -> String -> IO Unit
550 | writeFile fn text = withFile fn WriteMode (\h -> hPutStr h text)
551 | 
552 | writeLines :: FilePath -> List String -> IO Unit
553 | writeLines fn ls = withFile fn WriteMode (\h -> iomapM_ (hPutStrLn h) ls)
554 | 
555 | --------------------------------------------------------------------------------
556 | -- ** State monad
557 | 
558 | type State s a = s -> Pair s a
559 | 
560 | runState  :: State s a -> s -> Pair s a
561 | evalState :: State s a -> s -> a
562 | execState :: State s a -> s -> s
563 | runState  f = f
564 | evalState f = compose snd f
565 | execState f = compose fst f
566 | 
567 | sreturn :: a -> State s a
568 | sreturn x = \s -> Pair s x
569 | 
570 | sfmap :: (a -> b) -> State s a -> State s b
571 | sfmap f action = \s -> case action s of { Pair s' x -> Pair s' (f x) }
572 | 
573 | sliftA2 :: (a -> b -> c) -> State s a -> State s b -> State s c
574 | sliftA2 f action1 action2 = \s -> case action1 s of { Pair s' x ->
575 |   case action2 s' of { Pair s'' y -> Pair s'' (f x y) } }
576 | 
577 | sbind :: State s a -> (a -> State s b) -> State s b
578 | sbind f u = \s -> case f s of { Pair s' x -> u x s' }
579 | 
580 | sseq :: State s a -> State s b -> State s b
581 | sseq p q = sbind p (\_ -> q)
582 | 
583 | sseq3 :: State s a -> State s b -> State s c -> State s c
584 | sseq3 p q r = sseq p (sseq q r)
585 | 
586 | sseq4 :: State s a -> State s b -> State s c -> State s d -> State s d
587 | sseq4 p q r s = sseq p (sseq q (sseq r s))
588 | 
589 | sget :: State s s
590 | sget = \s -> Pair s s
591 | 
592 | sput :: s -> State s Unit
593 | sput s = \_ -> Pair s Unit
594 | 
595 | smodify :: (s -> s) -> State s Unit
596 | smodify f = \old -> Pair (f old) Unit
597 | 
598 | swhen :: Bool -> State s Unit -> State s Unit
599 | swhen b action = ifte b action (sreturn Unit)
600 | 
601 | ssequence_ :: List (State s a) -> State s Unit
602 | ssequence_ actions = case actions of { Nil -> sreturn Unit ; Cons u us -> sseq u (ssequence_ us) }
603 | 
604 | smapM :: (a -> State s b) -> List a -> State s (List b)
605 | smapM f list = case list of { Nil -> sreturn Nil ; Cons x xs -> sliftA2 Cons (f x) (smapM f xs) }
606 | 
607 | sforM :: List a -> (a -> State s b) -> State s (List b)
608 | sforM list f = smapM f list
609 | 
610 | smapM_ :: (a -> State s b) -> List a -> State s Unit
611 | smapM_ f list = case list of { Nil -> sreturn Unit ; Cons x xs -> sseq (f x) (smapM_ f xs) }
612 | 
613 | sforM_ :: List a -> (a -> State s b) -> State s Unit
614 | sforM_ list f = smapM_ f list
615 | 
616 | --------------------------------------------------------------------------------
617 | 


--------------------------------------------------------------------------------
/Closure.hs:
--------------------------------------------------------------------------------
  1 | 
  2 | -- | Closure conversion
  3 | 
  4 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-}
  5 | {-# LANGUAGE Strict #-}
  6 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-}
  7 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-}
  8 | 
  9 | module Closure where
 10 | 
 11 | --------------------------------------------------------------------------------
 12 | 
 13 | import Prelude ( Int , Char , Eq , Show )
 14 | import PrimGHC
 15 | 
 16 | --------------------------------------------------------------------------------
 17 | 
 18 | import Base
 19 | import Containers
 20 | import Types
 21 | import PrimOps
 22 | -- import DataCon
 23 | -- import Syntax
 24 | import Core
 25 | import ScopeCheck
 26 | 
 27 | {-% include "Base.hs"       %-}
 28 | {-% include "Containers.hs" %-}
 29 | {-% include "Types.hs"      %-}
 30 | {-% include "PrimOps.hs"    %-}
 31 | -- {-% include "DataCon.hs"    %-}
 32 | -- {-% include "Syntax.hs"     %-}
 33 | {-% include "Core.hs"       %-}
 34 | {-% include "ScopeCheck.hs" %-}
 35 | 
 36 | --------------------------------------------------------------------------------
 37 | -- ** Types
 38 | 
 39 | -- pattern VarF nvar = AtmF (VarA nvar)
 40 | -- pattern ConF ncon = AtmF (ConA ncon)
 41 | -- pattern KstF lit  = AtmF (KstA lit )
 42 | 
 43 | -- | We lift all lambdas (and lets, and branch right hand sides) to top-level.
 44 | -- Note: In @LetF@, the Bool is there to note whether the bound expression needs to
 45 | -- be evaluated immediately.
 46 | data Lifted
 47 |   = AtmF Atom
 48 |   | AppF Lifted Atom
 49 |   | PriF PrimOp (List Atom  )
 50 |   | LZPF PrimOp (List Lifted)
 51 |   | CasF LAtom (List BranchF)
 52 |   | LamF ClosureF
 53 |   | RecF Int (List (Named ClosureF)) Lifted
 54 |   | LetF Bool ClosureF Lifted
 55 |   deriving Show
 56 | 
 57 | data BranchF
 58 |   = BranchF (Named Con) Int ClosureF
 59 |   | DefaultF ClosureF
 60 |   deriving Show
 61 | 
 62 | isDefaultF :: BranchF -> Bool
 63 | isDefaultF branch = case branch of { DefaultF _ -> True ; _ -> False }
 64 | 
 65 | hasDefaultF :: List BranchF -> Bool
 66 | hasDefaultF ls = case ls of { Nil -> False ; Cons x xs -> case isDefaultF x of { True -> True ; _ -> hasDefaultF xs }}
 67 | 
 68 | -- | When allocating a closure, we create a new local environment
 69 | -- by pruning the current environment. We also remember the number
 70 | -- of remaining arguments (0 = thunk)
 71 | data ClosureF = ClosureF ClosureBody (List Level) Arity deriving Show
 72 | 
 73 | data ClosureBody
 74 |   = StaticBody Static
 75 |   | InlineBody Lifted
 76 |   deriving Show
 77 | 
 78 | closureIndex :: ClosureF -> Static
 79 | closureIndex c = case c of { ClosureF b _ _ -> case b of { StaticBody s -> s ; _ -> error "closureIndex" } }
 80 | 
 81 | closureArity :: ClosureF -> Arity
 82 | closureArity c = case c of { ClosureF _ _ a -> a }
 83 | 
 84 | closureEnv :: ClosureF -> List Level
 85 | closureEnv c = case c of { ClosureF _ e _ -> e }
 86 | 
 87 | closureEnvSize :: ClosureF -> Int
 88 | closureEnvSize c = case c of { ClosureF _ e _ -> length e }
 89 | 
 90 | -- | A static function is an arity (which is separated to environment
 91 | -- size + actual lambda arity) together with a lifted body
 92 | data StatFun = StatFun Arity Arity Lifted deriving Show
 93 | 
 94 | statFunTotalArity :: StatFun -> Int
 95 | statFunTotalArity sfun = case sfun of { StatFun envsize arity _lifted -> plus envsize arity }
 96 | 
 97 | -- | A top-level definition is a name, a static index and a static function
 98 | data TopLev = TopLev (Named Static) StatFun deriving Show
 99 | 
100 | topLevTotalArity :: TopLev  -> Int
101 | topLevTotalArity toplev = case toplev of { TopLev _nidx sfun -> statFunTotalArity sfun }
102 | 
103 | --------------------------------------------------------------------------------
104 | 
105 | recogLamsT :: Term -> Pair (List Name) Term
106 | recogLamsT term = case term of
107 |   { LamT namedBody -> case namedBody of { Named name body ->
108 |       case recogLamsT body of { Pair names rhs -> Pair (Cons name names) rhs } }
109 |   ; _              -> Pair Nil term }
110 | 
111 | recogAppsF :: Lifted -> Pair Lifted (List Atom)
112 | recogAppsF term = case term of
113 |   { AppF tm1 v2  -> case recogAppsF tm1 of { Pair f args -> Pair f (snoc args v2) }
114 |   ; _            -> Pair term Nil }
115 | 
116 | -- recogLetsT :: Term -> Pair (List Term) Term
117 | -- recogLetsT term = case term of
118 | --   { LetT t1 t2   -> case recogLetsT t2 of { Pair ts body -> Pair (Cons t1 ts) body } 
119 | --   ; _            -> Pair Nil term }
120 | 
121 | --------------------------------------------------------------------------------
122 | -- ** Closure converting programs
123 | 
124 | -- | The (named) int list is the mapping from the source code top-level functions
125 | -- to the generated code top-level functions
126 | data LiftedProgram = LProgram 
127 |   { _toplevs :: List TopLev
128 |   , _indices :: List (Named Int)
129 |   , _main    :: Pair Int Lifted }
130 |   deriving Show 
131 | 
132 | coreProgramToLifted :: CoreProgram -> LiftedProgram
133 | coreProgramToLifted coreprg = case coreprg of { CorePrg blocks _mainIdx mainTerm -> let
134 |   { nstatic = length defins  
135 |   ; defins  = forgetBlockStructure blocks
136 |   ; action1 = sforM defins (\defin -> case located defin of { Defin name term -> 
137 |       sfmap (\i -> Named name (closureIndex i)) (termToStaticClosure name idSubs 0 term) })
138 |   ; action2 = closureConvert nanoMainIs idSubs 0 mainTerm  
139 |   ; action  = sliftA2 Pair action1 action2
140 |   ; mainidx = case findIndex (\def -> stringEq nanoMainIs (ldefinedName def)) defins of { Just i -> i ; Nothing -> error "main not found" }
141 |   } in case runState action Nil of { Pair toplist pair2 -> 
142 |          case pair2 of { Pair idxlist mainlft -> LProgram (reverse toplist) idxlist (Pair mainidx mainlft) } } }
143 | 
144 | --------------------------------------------------------------------------------
145 | -- ** Subsitutions and pruning
146 | 
147 | -- | Partial subsitutions
148 | type Subs = (Level -> Maybe Level)
149 | 
150 | idSubs :: Subs 
151 | idSubs = \j -> ifte (lt j 0) Nothing (Just j)
152 | 
153 | composeSubs :: Subs -> Subs -> Subs
154 | composeSubs subs1 subs2 = \j -> case subs1 j of { Nothing -> Nothing ; Just k -> subs2 k }
155 | 
156 | -- | Substitution which flips (reverses) the first @n@ things
157 | flipSubs :: Int -> Subs
158 | flipSubs n = \j -> ifte (lt j 0) Nothing (Just (ifte (lt j n) (minus n (inc j)) j))
159 | 
160 | -- | Apply a substitution
161 | applySubs :: Subs -> Level -> Level
162 | applySubs subs j = case subs j of { Just k -> k ; _ -> error "substitution: input not in domain" }
163 | 
164 | type PrunedEnv = List Level
165 | 
166 | pruningSubs :: Subs -> Level -> Level -> Term -> Pair PrunedEnv Subs
167 | pruningSubs oldsubs boundary level term = Pair pruned subs where
168 |   { pruned0  = pruneEnvironment boundary level term
169 |   ; pruned   = map (applySubs oldsubs) pruned0
170 |   ; npruned  = length pruned
171 |   ; envtable = zip pruned (range npruned)
172 |   ; subs j   = ifte (lt j boundary) (mapLookup (applySubs oldsubs j) envtable) 
173 |                                     (Just (plus (minus j boundary) npruned))
174 |   } 
175 | 
176 | -- | Figure out the part of the environment used by a term.
177 | -- Returns a set of /levels/
178 | pruneEnvironment :: Level -> Level -> Term -> IntSet
179 | pruneEnvironment boundary level term = go level term where
180 |   { goAtom level atom = case atom of
181 |     { VarA nvar -> goVar level (forgetName nvar)
182 |     ; ConA _    -> setEmpty
183 |     ; KstA _    -> setEmpty }
184 |   ; goVar level var = case var of 
185 |       { IdxV idx  -> goLevel (minus level (inc idx))
186 |       ; LevV jdx  -> goLevel jdx
187 |       ; TopV _    -> setEmpty 
188 |       ; StrV _    -> setEmpty } 
189 |   ; goLevel j = ifte (lt j boundary) (setSingleton j) setEmpty 
190 |   ; go level term = case term of
191 |     { AtmT atom   -> goAtom level atom
192 |     ; AppT t1 a2  -> setUnion (go level t1) (goAtom level a2)
193 |     ; PriT _ args -> setUnions (map (goAtom level) args)
194 |     ; LZPT _ args -> setUnions (map (go     level) args)
195 |     ; LamT nbody  -> go (inc level) (forgetName nbody)
196 |     ; RecT n ts t -> let { level' = plus level n } in  setUnions (Cons (go level' t) (map (\t -> go level' (forgetName t)) ts))
197 |     ; LetT nt1 t2 -> setUnion (go level (forgetName nt1)) (go (inc level) t2)
198 |     ; CasT v brs  -> setUnion (goAtom level (located v)) (setUnions (map (goBranch level) brs)) }
199 |   ; goBranch level branch = case branch of
200 |     { BranchT _ n rhs -> go (plus level n) rhs
201 |     ; DefaultT    rhs -> go       level    rhs } }
202 | 
203 | --------------------------------------------------------------------------------
204 | -- ** Closure conversion
205 | 
206 | -- | The closure conversion monad. Note: the list is reverse order!
207 | type ClosM a = State (List TopLev) a
208 | 
209 | -- | Take the head entry of top level lists, and add 1 to it's index
210 | nextStatic :: ClosM Static
211 | nextStatic = sbind sget (\list -> case list of { Nil -> sreturn 0 ; Cons toplev _ ->
212 |   case toplev of { TopLev named _ -> case named of { Named name s -> sreturn (inc s) }}})
213 | 
214 | -- addTopLev :: TopLev -> ClosM Unit
215 | -- addTopLev what = smodify (Cons what)
216 | 
217 | addStatFun :: Name -> StatFun -> ClosM Static
218 | addStatFun name statfun =
219 |   sbind nextStatic                                             (\statidx ->
220 |   sbind (smodify (Cons (TopLev (Named name statidx) statfun))) (\_       -> sreturn statidx ))
221 | 
222 | -- | The closure environment maps original levels to pruned de Bruijn indices
223 | -- relative to the boundary
224 | type ClosEnv = Map Level Idx
225 | 
226 | -- | A multi-lambda
227 | data LambdaT
228 |   = LambdaT Int Term
229 |   deriving Show
230 | 
231 | -- | Closure-converting a lambda results in 1) a closure allocation
232 | -- and 2) a static function. The latter we just add to the list of
233 | -- of compiled stuff
234 | --
235 | -- The name is just a name for the resulting top-level definition
236 | -- (so debugging is easier), and the level is the level where the lambda
237 | -- starts (the \"boundary\" level)
238 | --
239 | lambdaToClosure :: Name -> Subs -> Level -> LambdaT -> ClosM ClosureF
240 | lambdaToClosure name oldsubs boundary lambda = case lambda of { LambdaT nargs body -> 
241 |   let { newlevel = plus boundary nargs } in 
242 |     case pruningSubs oldsubs boundary newlevel body of { Pair prunedEnv newsubs ->
243 |       let { npruned = length prunedEnv ; ntotal = plus npruned nargs 
244 |           ; newsubs' = composeSubs newsubs (flipSubs ntotal) }
245 |       in  sbind (closureConvert name newsubs' newlevel body)        (\statbody ->
246 |           sbind (addStatFun name (StatFun npruned nargs statbody))  (\statidx  ->
247 |           sreturn (ClosureF (StaticBody statidx) prunedEnv nargs)))  }}
248 | 
249 | termToStaticClosure :: Name -> Subs -> Level -> Term -> ClosM ClosureF
250 | termToStaticClosure name subs level tm = case recogLamsT tm of { Pair args body -> 
251 |   lambdaToClosure name subs level (LambdaT (length args) body) }
252 | 
253 | -- Note: we don't do environment pruning here (boundary = 0)
254 | termToInlineClosure :: Name -> Subs -> Level -> Term -> ClosM ClosureF
255 | termToInlineClosure name subs level tm = 
256 |   sbind (closureConvert name subs level tm) (\lifted ->
257 |   sreturn (ClosureF (InlineBody lifted) Nil 0))
258 | 
259 | -- | Whether to make a static function out of this closure
260 | doInlineClosure :: Term -> Bool
261 | doInlineClosure tm = case tm of
262 |   { LamT _  -> False
263 |   ; AtmT _  -> True
264 |   ; _       -> le (termSize tm) 64 }  
265 | 
266 | -- | In let bindings, it can happen that we bind some computation.
267 | -- Since on the stack there should be only values, in this case
268 | -- we have to evaluate them. On the other hand, for example lambdas and 
269 | -- atoms do not need to be evaluated.
270 | needsToBeEvaluated :: Term -> Bool
271 | needsToBeEvaluated tm = case tm of
272 |   { LamT _        -> False 
273 |   ; AtmT _        -> False
274 |   ; LetT _   body -> needsToBeEvaluated body
275 |   ; RecT _ _ body -> needsToBeEvaluated body
276 |   ; _             -> True }
277 | 
278 | termToClosure :: Name -> Subs -> Level -> Term -> ClosM ClosureF
279 | termToClosure name subs level term = ifte (doInlineClosure term) 
280 |   (termToInlineClosure name subs level term) 
281 |   (termToStaticClosure name subs level term)
282 | 
283 | addPrefix :: Name -> Name -> Name
284 | addPrefix prefix name = append3 prefix "/" name
285 | 
286 | closureConvert :: Name -> Subs -> Level -> Term -> ClosM Lifted
287 | closureConvert nameprefix subs level term = go level term where
288 |   { prefixed name = addPrefix nameprefix name
289 |     -- convert to de Bruijn levels from indices
290 |   ; goLev        jdx  = LevV (applySubs subs                   jdx  )
291 |   ; goIdx  level idx  = LevV (applySubs subs (minus level (inc idx)))
292 |   ; goAtom level atom = case atom of 
293 |       { VarA named -> case named of { Named name var -> case var of
294 |          { IdxV idx -> VarA (Named name (goIdx level idx)) 
295 |          ; LevV jdx -> VarA (Named name (goLev       jdx))
296 |          ; _        -> atom } }
297 |       ; _  -> atom }
298 |   ; go level term = case term of
299 |     { AtmT named    -> sreturn (AtmF (goAtom level named))
300 |     ; AppT t1 a2    -> sliftA2 AppF (go level t1) (sreturn (goAtom level a2))
301 |     ; PriT pri args -> sreturn (PriF pri  ( map  (goAtom level) args))
302 |     ; LZPT pri args -> sfmap   (LZPF pri) (smapM (go     level) args)
303 |     ; LamT _        -> case recogLamsT term of { Pair args body ->
304 |                          sfmap LamF (lambdaToClosure (prefixed "lambda")  subs level (LambdaT (length args) body)) }
305 |     ; CasT lv brs   -> sfmap (CasF (lfmap (goAtom level) lv)) (smapM (goBranch level) brs)
306 |     ; RecT n nts tm -> let { level' = plus level n }
307 |                        in  sliftA2 (RecF n) (smapM (goRec1 level') nts) (go level' tm) 
308 |     ; LetT ntm body -> case ntm of { Named name tm -> sliftA2 (LetF (needsToBeEvaluated tm)) 
309 |                          (termToClosure (prefixed name) subs level tm) (go (inc level) body) } }
310 |   ; goRec1 level named = case named of { Named name tm -> 
311 |       sfmap (Named name) (termToStaticClosure (prefixed name) subs level tm) }
312 |   ; goBranch level br  = case br of
313 |     { DefaultT        rhs -> sfmap (DefaultF       ) (termToClosure   (prefixed "default")      subs level            rhs ) 
314 |     ; BranchT named n rhs -> sfmap (BranchF named n) (lambdaToClosure (prefixed (nameOf named)) subs level (LambdaT n rhs))
315 |     }}
316 | 
317 | --------------------------------------------------------------------------------
318 | 


--------------------------------------------------------------------------------
/Containers.hs:
--------------------------------------------------------------------------------
  1 | -- | Containers
  2 | 
  3 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-}
  4 | {-# LANGUAGE Strict #-}
  5 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-}
  6 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-}
  7 | 
  8 | module Containers where
  9 | 
 10 | --------------------------------------------------------------------------------
 11 | 
 12 | import Prelude ( Int , Char , Eq , Show )
 13 | import PrimGHC
 14 | 
 15 | --------------------------------------------------------------------------------
 16 | 
 17 | import Base
 18 | 
 19 | {-% include "Base.hs" %-}
 20 | 
 21 | --------------------------------------------------------------------------------
 22 | -- * Containers
 23 | -- ** Sets
 24 | 
 25 | -- | We model sets as sorted lists. We would need an Ord instance so we only
 26 | -- do it for Int-s.
 27 | type IntSet = List Int
 28 | 
 29 | setEmpty :: IntSet
 30 | setEmpty = Nil
 31 | 
 32 | setMember :: Int -> IntSet -> Bool
 33 | setMember k set = elem k set
 34 | 
 35 | setSingleton :: Int -> IntSet
 36 | setSingleton x = Cons x Nil
 37 | 
 38 | setInsert :: Int -> IntSet -> IntSet
 39 | setInsert k set = go set where
 40 |   { go set = case set of { Nil -> Cons k Nil ; Cons x xs -> case compare k x of
 41 |     { LT -> Cons k set ; EQ -> set ; GT -> Cons x (go xs) } } }
 42 | 
 43 | setDelete :: Int -> IntSet -> IntSet
 44 | setDelete k = go where
 45 |   { go set = case set of { Nil -> Nil ; Cons x xs -> case compare k x of
 46 |     { LT -> set ; EQ -> xs ; GT -> Cons x (go xs) } } }
 47 | 
 48 | setInsertMany :: List Int -> IntSet -> IntSet
 49 | setInsertMany ks set = foldl (flip setInsert) set ks
 50 | 
 51 | setDeleteMany :: List Int -> IntSet -> IntSet
 52 | setDeleteMany ks set = foldl (flip setDelete) set ks
 53 | 
 54 | setIntersect :: IntSet -> IntSet -> IntSet
 55 | setIntersect set1 set2 = filter (\x -> setMember x set2) set1
 56 | 
 57 | setIsDisjoint :: IntSet -> IntSet -> Bool
 58 | setIsDisjoint set1 set2 = andList (map (\x -> not (setMember x set2)) set1)
 59 | 
 60 | setUnion :: IntSet -> IntSet -> IntSet
 61 | setUnion set1 set2 = flipFoldr setInsert set1 set2
 62 | 
 63 | setUnions :: List IntSet -> IntSet
 64 | setUnions list = foldl setUnion setEmpty list
 65 | 
 66 | --------------------------------------------------------------------------------
 67 | -- ** Association maps (sorted lists of (key,value) pairs)
 68 | 
 69 | type Map k v = List (Pair k v)
 70 | 
 71 | mapEmpty :: Map k v
 72 | mapEmpty = Nil
 73 | 
 74 | mapSingleton :: k -> v -> Map k v
 75 | mapSingleton k v = Cons (Pair k v) Nil
 76 | 
 77 | -- | This can be used for sorting. Of course it's quadratic...
 78 | mapFromList :: List (Pair Int v) -> Map Int v
 79 | mapFromList list = foldl g mapEmpty list where 
 80 |   { g old pair = case pair of { Pair k v -> mapInsert k v (const v) old } }
 81 | 
 82 | -- mapLookup :: Eq k => k -> Map k v -> Maybe v
 83 | mapLookup :: Int -> Map Int v -> Maybe v
 84 | mapLookup key list = go list where
 85 |   { go ls = case ls of
 86 |     { Nil -> Nothing ; Cons this rest -> case this of
 87 |       { Pair k v -> ifte (eq k key) (Just v) (go rest) } } }
 88 | 
 89 | -- mapDelete :: Eq k => k -> Map k v -> Map k v
 90 | mapDelete :: Int -> Map Int v -> Map Int v
 91 | mapDelete key list = go list where
 92 |   { go kvs = case kvs of { Nil -> Nil ; Cons kv rest -> case kv of
 93 |     { Pair k v -> ifte (eq k key) rest (Cons kv (go rest)) } } }
 94 | 
 95 | -- | For proper insertion we need ordering, but we only have that for Ints...
 96 | mapInsert :: Int -> v -> (v -> v) -> Map Int v -> Map Int v
 97 | mapInsert key y f list = go list where
 98 |   { go kvs = case kvs of { Nil -> Cons (Pair key y) Nil ; Cons kv rest -> case kv of
 99 |     { Pair k v -> case compare k key of
100 |       { LT -> Cons kv               (go rest)
101 |       ; EQ -> Cons (Pair k   (f v))     rest
102 |       ; GT -> Cons (Pair key y    )     kvs } } } }
103 | 
104 | mapAdjust :: Int -> (v -> v) -> Map Int v -> Map Int v
105 | mapAdjust key f list = go list where
106 |   { go kvs = case kvs of { Nil -> Nil ; Cons kv rest -> case kv of
107 |     { Pair k v -> case compare k key of
108 |       { LT -> Cons kv               (go rest)
109 |       ; EQ -> Cons (Pair k   (f v))     rest
110 |       ; GT -> kvs } } } }
111 | 
112 | --------------------------------------------------------------------------------
113 | -- ** Trie
114 | 
115 | data Trie a = Node (Maybe a) (Map Int (Trie a)) deriving Show
116 | 
117 | trieEmpty :: Trie a
118 | trieEmpty = Node Nothing Nil
119 | 
120 | trieSingleton :: String -> a -> Trie a
121 | trieSingleton str y = case str of { Nil -> Node (Just y) mapEmpty
122 |   ; Cons x xs -> Node Nothing (mapSingleton (ord x) (trieSingleton xs y)) }
123 | 
124 | trieLookup :: String -> Trie a -> Maybe a
125 | trieLookup str trie = case trie of { Node mb table -> case str of { Nil -> mb
126 |   ; Cons x xs -> case mapLookup (ord x) table of { Nothing -> Nothing
127 |     ; Just trie' -> trieLookup xs trie' } } }
128 | 
129 | trieLookupDefault :: a -> String -> Trie a -> a
130 | trieLookupDefault def str trie = case trie of { Node mb table -> case str of 
131 |   { Nil -> case mb of { Just y -> y ; Nothing -> def } 
132 |   ; Cons x xs -> case mapLookup (ord x) table of { Nothing -> def
133 |     ; Just trie' -> trieLookupDefault def xs trie' } } }
134 | 
135 | trieMember :: String -> Trie a -> Bool
136 | trieMember str trie = isJust (trieLookup str trie)
137 | 
138 | trieInsert :: String -> a -> Trie a -> Trie a
139 | trieInsert string y trie = go string trie where
140 |   { go str trie = case trie of { Node mb table -> case str of
141 |     { Nil -> Node (Just y) table
142 |     ; Cons x xs -> Node mb (mapInsert (ord x) (trieSingleton xs y) (go xs) table) } } }
143 | 
144 | -- | throws an exception if the key already exists
145 | trieInsertNew :: String -> String -> a -> Trie a -> Trie a
146 | trieInsertNew errmsg string y trie = go string trie where
147 |   { go str trie = case trie of { Node mb table -> case str of
148 |     { Nil -> case mb of { Nothing -> Node (Just y) table ; _ -> error errmsg }
149 |     ; Cons x xs -> Node mb (mapInsert (ord x) (trieSingleton xs y) (go xs) table) } } }
150 | 
151 | trieDelete :: String -> Trie a -> Trie a
152 | trieDelete str trie = case trie of { Node mb table -> case str of
153 |   { Nil -> Node Nothing table
154 |   ; Cons x xs -> Node mb (mapAdjust (ord x) (trieDelete xs) table) } }
155 | 
156 | trieAlter :: (Maybe a -> Maybe a) -> String -> Trie a -> Trie a 
157 | trieAlter f string trie = case f (trieLookup string trie) of
158 |   { Nothing -> trieDelete string   trie
159 |   ; Just y  -> trieInsert string y trie }
160 | 
161 | trieUnion :: Trie a -> Trie a -> Trie a
162 | trieUnion trie1 trie2 = go trie2 (trieToList trie1) where
163 |   { go trie list = case list of { Nil -> trie ; Cons pair rest -> case pair of { Pair key val ->
164 |       go (trieInsert key val trie) rest }}}
165 | 
166 | trieUnions :: List (Trie a) -> Trie a 
167 | trieUnions list = case list of { Nil -> trieEmpty ; Cons this rest -> case rest of 
168 |   { Nil -> this ; _ -> trieUnion this (trieUnions rest) }}
169 | 
170 | trieFromList :: List (Pair String a) -> Trie a
171 | trieFromList list = foldr f trieEmpty list where { f kv trie = case kv of { Pair k v -> trieInsert k v trie } }
172 | 
173 | -- | throws an exception if there is a duplicate key
174 | trieFromListUnique :: (String -> String) -> List (Pair String a) -> Trie a
175 | trieFromListUnique errmsg list = foldr f trieEmpty list where { f kv trie = case kv of { Pair k v -> trieInsertNew (errmsg k) k v trie } } 
176 | 
177 | trieBuild :: (a -> b) -> (a -> b -> b) -> List (Pair String a) -> Trie b
178 | trieBuild f g xys = foldl insert trieEmpty xys where
179 |   { insert old pair = case pair of { Pair k x -> trieAlter (h x) k old }
180 |   ; h x mb = case mb of { Nothing -> Just (f x) ; Just y -> Just (g x y) } }  
181 | 
182 | trieToList :: Trie a -> List (Pair String a)
183 | trieToList trie = go trie where { go trie = case trie of { Node mb table -> let
184 |   { f pair = case pair of { Pair k trie' -> map (prepend (chr k)) (go trie') }
185 |   ; rest = concat (map f table)
186 |   ; prepend x pair = case pair of { Pair xs y -> Pair (Cons x xs) y } }
187 |   in case mb of { Nothing -> rest ; Just y -> Cons (Pair Nil y) rest } } }
188 | 
189 | trieKeys :: Trie a -> List String
190 | trieKeys trie = map fst (trieToList trie)
191 | 
192 | --------------------------------------------------------------------------------
193 | -- ** Sets of strings
194 | 
195 | type TrieSet = Trie Unit
196 | 
197 | trieSetSingleton :: String -> TrieSet
198 | trieSetSingleton key = trieSingleton key Unit
199 | 
200 | trieSetInsert :: String -> TrieSet -> TrieSet
201 | trieSetInsert key set = trieInsert key Unit set
202 | 
203 | trieSetFromList :: List String -> TrieSet
204 | trieSetFromList list = trieFromList (map (\k -> Pair k Unit) list)
205 | 
206 | trieSetToList :: TrieSet -> List String
207 | trieSetToList set = map fst (trieToList set)
208 | 
209 | --------------------------------------------------------------------------------
210 | 


--------------------------------------------------------------------------------
/Core.hs:
--------------------------------------------------------------------------------
  1 | 
  2 | -- | Core language
  3 | 
  4 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-}
  5 | {-# LANGUAGE Strict #-}
  6 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-}
  7 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-}
  8 | 
  9 | module Core where
 10 | 
 11 | --------------------------------------------------------------------------------
 12 | 
 13 | import Prelude ( Int , Char , Eq , Show )
 14 | import PrimGHC
 15 | 
 16 | --------------------------------------------------------------------------------
 17 | 
 18 | import Base
 19 | import Types
 20 | import PrimOps
 21 | 
 22 | {-% include "Base.hs"     %-}
 23 | {-% include "Types.hs"    %-}
 24 | {-% include "PrimOps.hs"  %-}
 25 | 
 26 | --------------------------------------------------------------------------------
 27 | -- ** Terms
 28 | 
 29 | -- pattern VarT nvar = AtmT (VarA nvar)
 30 | -- pattern ConT ncon = AtmT (ConA ncon)
 31 | -- pattern KstT lit  = AtmT (KstA lit )
 32 | 
 33 | type LTerm = Located Term
 34 | 
 35 | data Term
 36 |   = AtmT Atom 
 37 |   | LamT (Named Term)
 38 |   | AppT Term Atom
 39 |   | PriT PrimOp (List Atom)
 40 |   | LZPT PrimOp (List Term)
 41 |   | LetT (Named Term) Term
 42 |   | RecT Int (List (Named Term)) Term
 43 |   | CasT LAtom (List BranchT)
 44 |   | MainT
 45 |   deriving Show
 46 | 
 47 | isLambdaTerm :: Term -> Bool
 48 | isLambdaTerm term = case term of { LamT _ -> True ; _ -> False }
 49 | 
 50 | data BranchT
 51 |   = BranchT (Named Con) Int Term
 52 |   | DefaultT Term
 53 |   deriving Show
 54 |  
 55 | -- | A list of top-level terms and the main (index and term, even though it's redundant)
 56 | data CoreProgram 
 57 |   = CorePrg (Program Term) TopIdx Term
 58 |   deriving Show
 59 | 
 60 | --------------------------------------------------------------------------------
 61 | 
 62 | showAtom :: Atom -> String
 63 | showAtom a = prettyAtom a
 64 | 
 65 | showTerm :: Term -> String
 66 | showTerm term = case term of
 67 |   { AtmT atom       -> showAtom atom
 68 |   ; LamT nbody      -> case nbody of { Named name body -> concat [ "(" , singleton backslashC , name , " -> " , showTerm body , ")" ] }
 69 |   ; AppT t a        -> concat [ "(" , showTerm t , " " , showAtom a , ")" ]
 70 |   ; PriT pop args   -> case pop of { PrimOp _ p -> concat [ showPrim p , "(" , intercalate "," (map showAtom args) , ")" ]} 
 71 |   ; LZPT pop args   -> case pop of { PrimOp _ p -> concat [ showPrim p , "(" , intercalate "," (map showTerm args) , ")" ]}
 72 |   ; LetT nt  body   -> concat [ "[let " , showNamedTerm nt , " in " , showTerm body , "]" ]
 73 |   ; RecT _ nts body -> concat [ "letrec { " , intercalate " ; " (map showNamedTerm nts) , " } in " , showTerm body ]
 74 |   ; CasT la brs     -> concat [ "case " , showAtom (located la) , " of { " ,intercalate " ; " (map showBranchT brs) , " }" ]
 75 |   ; MainT           -> "<main>" }
 76 | 
 77 | showTermList :: List Term -> String
 78 | showTermList tms = append3 "[" (intercalate "," (map showTerm tms)) "]"
 79 | 
 80 | showNamedTerm :: Named Term -> String
 81 | showNamedTerm nterm = case nterm of { Named n t -> append3 n " = " (showTerm t) }
 82 | 
 83 | showBranchT :: BranchT -> String
 84 | showBranchT branch = case branch of
 85 |   { BranchT ncon n rhs -> concat [ nameOf ncon , " " , intercalate " " (map (\i -> appendInt "a" i) (rangeFrom 1 n)) , " -> " , showTerm rhs ]
 86 |   ; DefaultT rhs -> append "_ -> " (showTerm rhs) }
 87 | 
 88 | --------------------------------------------------------------------------------
 89 | 
 90 | programToTerm :: Program Term -> Term
 91 | programToTerm blocks = go 0 blocks where
 92 |   { go k blocks = case blocks of { Nil -> MainT ; Cons this rest -> case this of 
 93 |       { Recursive    defins -> let { n = length defins ; k' = plus k n} in
 94 |                                RecT n (map (worker k') defins) (go      k' rest)
 95 |       ; NonRecursive defin  -> LetT   (     worker k   defin ) (go (inc k) rest) }}
 96 |   ; worker level0 ldefin = case ldefin of { Located loc defin -> case defin of
 97 |      { Defin name term -> Named name (transformVars f level0 term) }} 
 98 |      where { f _level var = case var of { IdxV i -> IdxV i 
 99 |                                         ; LevV j -> LevV (plus level0 j) 
100 |                                         ; TopV k -> LevV k 
101 |                                         ; _      -> var }}}
102 | 
103 | -- | it's important to eliminate levels because they can point to top-level definitions...?
104 | -- TODO: to be really correct, we should recognize top-level definitions...
105 | termToProgram :: Term -> Program Term
106 | termToProgram term = go 0 term where  
107 |   { go k term = case term of
108 |       { RecT n nts body -> let { k' = plus k n } in  
109 |                            Cons (Recursive     (map (worker k') nts)) (go      k' body)
110 |       ; LetT   nt  body -> Cons (NonRecursive  (     worker k   nt )) (go (inc k) body)
111 |       ; MainT -> Nil }
112 |   ; worker level0 named = case named of { Named name term -> fakeLocated (Defin name (transformVars f level0 term)) } where
113 |       { f level var = case var of { IdxV i -> g level (minus level (inc i)) ; LevV j -> g level j ; _ -> var }
114 |       ; g level jdx = ifte (lt jdx level0) (TopV jdx) (IdxV (minus level (inc jdx)))
115 |       }}
116 | 
117 | findToplevelMain :: Term -> TopIdx
118 | findToplevelMain term = go 0 term where
119 |   { go k term = case term of 
120 |     { RecT n _ body -> go (plus k n) body
121 |     ; LetT  nt body -> ifte (stringEq (nameOf nt) "main") k (go (inc k) body)
122 |     ; MainT         -> error "findToplevelMain: top level `main` not found" }}
123 | 
124 | --------------------------------------------------------------------------------
125 | 
126 | transformVars :: (Level -> Var -> Var) -> Level -> Term -> Term
127 | transformVars f = transformAtoms g where { g level atom = case atom of
128 |   { VarA nvar -> case nvar of { Named name var -> VarA (Named name (f level var)) } ; _ -> atom } }
129 | 
130 | transformAtoms :: (Level -> Atom -> Atom) -> Level -> Term -> Term
131 | transformAtoms f = transformTerm worker where
132 |   { worker level term = case term of
133 |     { AtmT a        -> AtmT (f level a)
134 |     ; AppT t a      -> AppT t (f level a)
135 |     ; PriT p as     -> PriT p (map (f level) as)
136 |     ; CasT a brs    -> CasT (lfmap (f level) a) brs 
137 |     ; _             -> term } }
138 | 
139 | transformTerm :: (Level -> Term -> Term) -> Level -> Term -> Term
140 | transformTerm f level term = go level term where 
141 |   { go level term = case term of
142 |     { AtmT a        -> f level (AtmT a)
143 |     ; LamT nt       -> f level (LamT (nfmap (go (inc level)) nt))
144 |     ; AppT t a      -> f level (AppT (go level t) a)
145 |     ; PriT p as     -> f level (PriT p as)
146 |     ; LZPT p ts     -> f level (LZPT p (map (go level) ts))
147 |     ; LetT nt1 t2   -> f level (LetT (nfmap (go level) nt1) (go (inc level) t2))
148 |     ; RecT n nts t  -> f level (let { level' = plus level n } in RecT n (map (nfmap (go level')) nts) (go level' t))
149 |     ; CasT la brs   -> f level (CasT la (map (goBranch level) brs)) 
150 |     ; MainT         -> f level MainT }
151 |   ; goBranch level branch = case branch of  
152 |     { BranchT c n t -> BranchT c n (go (plus level n) t)
153 |     ; DefaultT    t -> DefaultT    (go       level    t) } }
154 | 
155 | --------------------------------------------------------------------------------
156 | 
157 | -- | Shift de Bruijn indices in variables. We /add/ the offset to the indices,
158 | -- leaving levels unchanged.
159 | shiftVarRight :: Int -> Var -> Var
160 | shiftVarRight ofs var = case var of 
161 |    { IdxV i -> IdxV (plus i ofs) 
162 |    ; LevV j -> LevV j
163 |    ; _      -> var } 
164 | 
165 | -- | Shift de Bruijn indices in atoms
166 | shiftAtomRight :: Int -> Atom -> Atom
167 | shiftAtomRight ofs atom = case atom of  { VarA namedVar -> VarA (nfmap (shiftVarRight ofs) namedVar) ; _ -> atom }
168 | 
169 | -- | The condition should returin @True@ \"outside the term\"
170 | shiftVarConditional :: (Level -> Bool) -> Int -> Level -> Var -> Var
171 | shiftVarConditional cond ofs level var = case var of 
172 |    { IdxV i -> let { j = minus level (inc i) } in 
173 |                ifte (cond j)     (IdxV (plus i ofs)) var
174 |    ; LevV j -> ifte (cond j) var (LevV (plus j ofs)) 
175 |    ; _      -> var } 
176 | 
177 | shiftTerm :: Level -> Level -> Term -> Term
178 | shiftTerm oldlevel newlevel term = let { shift = minus newlevel oldlevel } in 
179 |   transformVars (\level var -> shiftVarConditional (\j -> lt j oldlevel) shift level var) oldlevel term
180 | 
181 | --------------------------------------------------------------------------------
182 | 
183 | atomIndexToLevel :: Level -> Atom -> Atom
184 | atomIndexToLevel level atom = case atom of { VarA nvar -> case nvar of { Named name var -> case var of
185 |   { IdxV i -> VarA (Named name (LevV (minus level (inc i)))) ; _ -> atom }} ; _ -> atom }
186 | 
187 | atomLevelToIndex :: Level -> Atom -> Atom
188 | atomLevelToIndex level atom = case atom of { VarA nvar -> case nvar of { Named name var -> case var of
189 |   { LevV j -> VarA (Named name (IdxV (minus level (inc j)))) ; _ -> atom }} ; _ -> atom }
190 | 
191 | termIndicesToLevels :: Level -> Term -> Term
192 | termIndicesToLevels level term = transformAtoms atomIndexToLevel level term 
193 | 
194 | termLevelsToIndices :: Level -> Term -> Term
195 | termLevelsToIndices level term = transformAtoms atomLevelToIndex level term 
196 | 
197 | --------------------------------------------------------------------------------
198 | 
199 | termSize :: Term -> Size
200 | termSize term = go term where 
201 |   { goNamed named = case named of { Named _ tm -> go tm }
202 |   ; go term = case term of
203 |     { AtmT _       -> 1
204 |     ; LamT nbody   -> inc (goNamed nbody)
205 |     ; AppT tm v    -> inc (go tm)
206 |     ; PriT _ _     -> 1
207 |     ; LZPT _ ts    -> inc (sum (map go ts))
208 |     ; LetT nt1 t2  -> inc (plus (goNamed nt1) (go t2))
209 |     ; RecT _ ls tm -> inc (plus (sum (map goNamed ls)) (go tm))
210 |     ; CasT _ brs   -> inc (sum (map goBranch brs)) }
211 |   ; goBranch br = case br of
212 |     { DefaultT tm    -> go tm
213 |     ; BranchT _ k tm -> plus k (go tm) } }
214 | 
215 | --------------------------------------------------------------------------------
216 | 


--------------------------------------------------------------------------------
/DataCon.hs:
--------------------------------------------------------------------------------
  1 | 
  2 | -- | Data constructors
  3 | 
  4 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-}
  5 | {-# LANGUAGE Strict #-}
  6 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-}
  7 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-}
  8 | 
  9 | module DataCon where
 10 | 
 11 | --------------------------------------------------------------------------------
 12 | 
 13 | import Prelude ( Int , Char , Eq , Show )
 14 | import PrimGHC
 15 | 
 16 | --------------------------------------------------------------------------------
 17 | 
 18 | import Base
 19 | import Containers
 20 | import Types
 21 | import Syntax
 22 | 
 23 | {-% include "Base.hs"       %-}
 24 | {-% include "Containers.hs" %-}
 25 | {-% include "Types.hs"      %-}
 26 | {-% include "Syntax.hs"     %-}
 27 | 
 28 | --------------------------------------------------------------------------------
 29 | -- ** Data constructors
 30 | 
 31 | isDCon :: Name -> Bool
 32 | isDCon name = isUpper (head name)
 33 | 
 34 | -- -- | Mapping constructor names to constructor tags
 35 | -- type DataConTable = Trie Con
 36 | 
 37 | con_False    = 0
 38 | con_True     = 1
 39 | con_Unit     = 2
 40 | con_Nil      = 3
 41 | con_Cons     = 4
 42 | con_Nothing  = 5
 43 | con_Just     = 6
 44 | con_Pair     = 7
 45 | con_ActionToken   = 8
 46 | con_ReadMode      = 9
 47 | con_WriteMode     = 10
 48 | con_AppendMode    = 11
 49 | con_ReadWriteMode = 12
 50 | 
 51 | type DConState = Pair Int DataConTable
 52 | 
 53 | initialDConState :: DConState
 54 | initialDConState = Pair 13 (trieFromList predefinedDataCons)
 55 | 
 56 | predefinedDataCons :: List (Pair String Int)
 57 | predefinedDataCons =
 58 |   [ Pair "False" con_False , Pair "True" con_True , Pair "Unit"    con_Unit    , Pair "Pair" con_Pair
 59 |   , Pair "Nil"   con_Nil   , Pair "Cons" con_Cons , Pair "Nothing" con_Nothing , Pair "Just" con_Just 
 60 |   , Pair "ReadMode"   con_ReadMode   , Pair "WriteMode"     con_WriteMode      , Pair "ActionToken" con_ActionToken
 61 |   , Pair "AppendMode" con_AppendMode , Pair "ReadWriteMode" con_ReadWriteMode ]
 62 | 
 63 | -- | Collect data constructors from the source.
 64 | --
 65 | -- Encoding of constructors tags:
 66 | --
 67 | -- *  0 = False
 68 | -- *  1 = True
 69 | -- *  2 = Unit
 70 | -- *  3 = Nil
 71 | -- *  4 = Cons
 72 | -- *  5 = Nothing
 73 | -- *  6 = Just
 74 | -- *  7 = Pair
 75 | -- *  8 = RealWorld
 76 | -- *  9 = ReadMode
 77 | -- * 10 = WriteMode
 78 | -- * 11 = AppendMode
 79 | -- * 12 = ReadWriteMode
 80 | -- * 13.. = user defined constructors
 81 | --
 82 | -- We need to fix @Int@, @Char@, @False@, @True@, @Unit@, @Nil@, @Cons@, @Just@, @Nothing@, @Pair@
 83 | -- and the file access modes because the primops use them
 84 | --
 85 | collectDataCons :: List (Defin Expr) -> DataConTable
 86 | collectDataCons defins = snd (execState action initialDConState) where
 87 |   { action = smapM_ collectDataConsWorker (map definedWhat defins) }
 88 | 
 89 | collectDataConsWorker :: Expr -> State DConState Unit
 90 | collectDataConsWorker expr = go expr where
 91 |   { insert name = sbind sget (\pair -> case pair of { Pair n table -> case trieLookup name table of
 92 |     { Just k  -> sreturn Unit
 93 |     ; Nothing -> sput (Pair (inc n) (trieInsert name n table)) } })
 94 |   ; go expr = case expr of
 95 |     { VarE lnam   -> let { name = located lnam } in case isDCon name of { False -> sreturn Unit ; True -> insert name }
 96 |     ; StrE k      -> sreturn Unit
 97 |     ; AppE e1 e2  -> sseq (go e1) (go e2)
 98 |     ; LamE _ body -> go body
 99 |     ; LetE defs body -> goLet defs body
100 |     ; RecE defs body -> goLet defs body
101 |     ; CaseE e branches -> sseq (lgo e) (smapM_ (\br -> case br of
102 |         { BranchE con _ rhs -> sbind (insert con) (\_ -> go rhs)
103 |         ; DefaultE      rhs -> go rhs }) branches)
104 |     ; LitE _     -> sreturn Unit
105 |     ; ListE list -> smapM_ go list
106 |     ; PrimE _ as -> smapM_ go as
107 |     } 
108 |   ; lgo lexpr = case lexpr of { Located loc expr -> sfmap (Located loc) (go expr) } 
109 |   ; goLet defs body = sseq 
110 |       (smapM_ (\defin -> case located defin of { Defin _ rhs -> go rhs }) defs ) (go body) }
111 | 
112 | --------------------------------------------------------------------------------
113 | 


--------------------------------------------------------------------------------
/Dependency.hs:
--------------------------------------------------------------------------------
  1 | 
  2 | -- | Dependency analysis: Partitioning lets into recursive and non-recursive parts
  3 | 
  4 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-}
  5 | {-# LANGUAGE Strict #-}
  6 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-}
  7 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-}
  8 | 
  9 | module Dependency where
 10 | 
 11 | --------------------------------------------------------------------------------
 12 | 
 13 | import Prelude ( Int , Char , Eq , Show )
 14 | import PrimGHC
 15 | 
 16 | --------------------------------------------------------------------------------
 17 | 
 18 | import Base
 19 | import Containers
 20 | import Types
 21 | -- import Core
 22 | import Syntax
 23 | 
 24 | {-% include "Base.hs"        %-}
 25 | {-% include "Containers.hs"  %-}
 26 | {-% include "Types.hs"       %-}
 27 | -- {-% include "Core.hs"        %-}
 28 | {-% include "Syntax.hs"      %-}
 29 | 
 30 | --------------------------------------------------------------------------------
 31 | -- * partition lets into recursive and non-recursive parts
 32 | 
 33 | data Let
 34 |   = Let1   LDefinE
 35 |   | LetRec (List LDefinE)
 36 |   deriving Show
 37 | 
 38 | isRecursiveDefin :: DefinE -> Bool
 39 | isRecursiveDefin def = case def of { Defin name rhs -> trieMember name (exprFreeVars rhs) }
 40 | 
 41 | mkLet :: List LDefinE -> Let
 42 | mkLet list = case mbSingleton list of 
 43 |   { Nothing  -> LetRec list
 44 |   ; Just def -> ifte (isRecursiveDefin (located def)) (LetRec (singleton def)) (Let1 def) }
 45 | 
 46 | checkForDuplicates :: List LDefinE -> a -> a
 47 | checkForDuplicates defins what = case trieFromListUnique duplicate (map (compose definToPair located) defins) of 
 48 |   { Node _ _ -> what } where { duplicate n  = concat [ "multiple declaration of " , quoteString n ] }
 49 | 
 50 | -- debug "graph" (trieToList graph) (debug "SCC" sccs (
 51 | partitionLets :: List LDefinE -> List Let
 52 | partitionLets ldefins = map (compose mkLet (map lkp)) sccs where
 53 |   { names = map (compose definedName located) ldefins
 54 |   ; isName n = stringElem n names
 55 |   ; graph = trieFromList (for ldefins (\ldef -> case ldef of { Located loc def -> case def of { Defin name rhs -> 
 56 |       Pair name (filter isName (trieSetToList (exprFreeVars rhs))) }} ))
 57 |   ; sccs = dependencyAnalysis (checkForDuplicates ldefins graph)
 58 |   ; defTrie = trieFromList (map ldefinToPair ldefins)
 59 |   ; lkp n = case trieLookup n defTrie of { Just locy -> case locy of { Located loc y -> Located loc (Defin n y) } 
 60 |       ; Nothing -> error "partitionLets: shouldn't happen" } }
 61 | 
 62 | -- | Top-level everything is letrec, but we still do the reordering.
 63 | reorderProgram :: List (LDefin Expr) -> Program Expr
 64 | reorderProgram list = worker (checkRecursiveLets (reorderLets (RecE list MainE))) where
 65 |   { worker expr = case expr of
 66 |     { LetE defs body -> append (map NonRecursive defs) (worker body)
 67 |     ; RecE defs body -> Cons          (Recursive defs) (worker body)
 68 |     ; MainE -> Nil }}
 69 | 
 70 | -- | check for \"lambdas only\" condition in letrecs
 71 | checkRecursiveLets :: Expr -> Expr
 72 | checkRecursiveLets expr = go expr where
 73 |   { go expr = case expr of
 74 |       { VarE _         -> expr
 75 |       ; AppE e1 e2     -> AppE (go e1) (go e2)
 76 |       ; LamE v body    -> LamE v (go body)
 77 |       ; LetE defs body -> LetE (map goDefin defs) (go body)
 78 |       ; RecE defs body -> let { bad = filter (\ldef -> not (isLambdaExpr (definedWhat (located ldef)))) defs } 
 79 |                           in  case bad of
 80 |                                 { Nil -> RecE (map goDefin defs) (go body)
 81 |                                 ; _   -> let { badwhat = map showNameAndLoc bad ; text = intercalate ", " badwhat }
 82 |                                 in  (error (append "recursive definitions must be lambdas: " text)) }                        
 83 |       ; CaseE what brs -> CaseE what (map goBranch brs)
 84 |       ; LitE  _        -> expr
 85 |       ; ListE list     -> ListE   (map go list)
 86 |       ; PrimE p list   -> PrimE p (map go list)
 87 |       ; StrE  _        -> expr
 88 |       ; MainE          -> expr }
 89 |   ; goDefin  ldefin = case ldefin of { Located loc defin -> case defin of 
 90 |       { Defin name rhs -> Located loc (Defin name (go rhs)) }}
 91 |   ; goBranch branch = case branch of 
 92 |       { DefaultE rhs         -> DefaultE (go rhs)
 93 |       ; BranchE con args rhs -> BranchE con args (go rhs) } }
 94 | 
 95 | reorderLets :: Expr -> Expr
 96 | reorderLets expr = go expr where
 97 |   { go expr = case expr of
 98 |     { VarE  _       -> expr
 99 |     ; LitE  _       -> expr
100 |     ; StrE  _       -> expr
101 |     ; AppE  e1 e2   -> AppE (go e1) (go e2)
102 |     ; LamE  n body  -> LamE n (go body)
103 |     ; LetE  defs e  -> LetE (map goDefin defs) (go e)
104 |     ; RecE  defs e  -> let { ps = partitionLets (map goDefin defs) } in letWorker ps (go e)
105 |     ; CaseE e brs   -> CaseE (lfmap go e) (map goBranch brs)
106 |     ; ListE es      -> ListE (map go es)
107 |     ; PrimE p es    -> PrimE p (map go es) 
108 |     ; MainE         -> MainE }
109 |   ; goDefin ldef = lfmap (\def -> case def of { Defin n rhs -> Defin n (go rhs) }) ldef
110 |   ; goBranch branch = case branch of 
111 |       { DefaultE rhs -> DefaultE (go rhs) 
112 |       ; BranchE con args rhs -> BranchE con args (go rhs) }
113 |   ; letWorker ls body = case ls of { Nil -> body ; Cons this rest -> case this of
114 |       { Let1   def1 -> LetE (singleton def1) (letWorker rest body)
115 |       ; LetRec defs -> RecE            defs  (letWorker rest body) }} }
116 | 
117 | --------------------------------------------------------------------------------
118 | -- * Dependency graphs
119 | 
120 | type Vtx = Name
121 | 
122 | type Graph = Trie (List Vtx)
123 | 
124 | graphToList :: Graph -> List (Pair Vtx Vtx)
125 | graphToList g = concat (map worker (trieToList g)) where
126 |   { worker pair = case pair of { Pair v ws -> map (\w -> Pair v w) ws } }
127 | 
128 | graphFromList :: List (Pair Vtx Vtx) -> Graph 
129 | graphFromList = trieBuild singleton Cons
130 | 
131 | -- | NB: if we naively flip by @graphFromList . map swap . graphToList@, we will
132 | -- lose vertices with no inbound edges! So we have to reinsert those 
133 | flipGraph :: Graph -> Graph
134 | flipGraph graph = insertKeys (graphFromList (map swap (graphToList graph))) where
135 |   { keys = trieKeys graph
136 |   ; insertKeys g = foldl (\old k -> trieAlter h k old) g keys
137 |   ; h mb = case mb of { Nothing -> Just Nil ; Just _ -> mb } }
138 | 
139 | dependencyAnalysis :: Graph -> List (List Vtx)
140 | dependencyAnalysis g = tarjanSCC (flipGraph g)
141 | 
142 | --------------------------------------------------------------------------------
143 | -- * Tarjan's topologically sorted SCC algorithm
144 | 
145 | -- | Note: the table @links@ consists of @(index,lowlink)@ pairs
146 | data Tarjan = Tarjan 
147 |   { _next   :: Int
148 |   , _stack  :: List Vtx 
149 |   , _links  :: Trie (Pair Int Int)  
150 |   , _output :: List (List Vtx)
151 |   }
152 |   deriving Show
153 | 
154 | -- | Based on <https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm>
155 | tarjanSCC :: Graph -> (List (List Vtx))
156 | tarjanSCC graph = case execState (smapM_ worker vtxSet) iniState of { Tarjan _ _ _ out -> out } where
157 |   { iniState = Tarjan 0 [] trieEmpty []
158 |   ; vtxSet   = trieKeys graph
159 |   ; worker v = sbind sget (\state -> case state of { Tarjan next stack links out -> 
160 |       case trieLookup v links of { Nothing -> scc v ; _ -> sreturn Unit }})
161 |   -- scc :: Vtx -> State Tarjan Unit
162 |   ; scc v = sbind sget (\state -> case state of { Tarjan next stack links out -> 
163 |             sseq3 (sput (Tarjan (inc next) (Cons v stack) (trieInsert v (Pair next next) links) out))
164 |                   (successor v) (popAndOut v) })
165 |   -- successor :: Vtx -> State Tarjan Unit
166 |   ; successor v = sforM_ (trieLookupDefault Nil v graph) (\w -> 
167 |       sbind sget (\state -> case state of { Tarjan next stack links output ->
168 |         case trieLookup w links of
169 |           { Nothing -> sseq (scc w) 
170 |               (sbind sget (\state -> case state of { Tarjan next stack links output ->
171 |                 case trieLookup v links of { Just vpair -> case vpair of { Pair vi vl -> 
172 |                 case trieLookup w links of { Just wpair -> case wpair of { Pair _  wl -> 
173 |                   sput (Tarjan next stack (trieInsert v (Pair vi (min vl wl)) links) output) }}}}}))
174 |           ; Just pair -> case pair of { Pair wi wl -> swhen (stringElem w stack)
175 |                (case trieLookup v links of { Just vpair -> case vpair of { Pair vi vl -> 
176 |                 case trieLookup w links of { Just wpair -> case wpair of { Pair wi _  -> 
177 |                   sput (Tarjan next stack (trieInsert v (Pair vi (min vl wi)) links) output) }}}})}  }}))
178 |   -- popAndOut :: Vtx -> State Tarjan Unit
179 |   ; popAndOut v = sbind sget (\state -> case state of { Tarjan next stack links output ->
180 |       case trieLookup v links of { Just vpair -> case vpair of { Pair vi vl -> swhen (eq vi vl) 
181 |         (let { this    =       takeWhile (\x -> not (stringEq x v)) stack 
182 |              ; stack'  = tail (dropWhile (\x -> not (stringEq x v)) stack)
183 |              ; output' = Cons (Cons v this) output }
184 |         in sput (Tarjan next stack' links output') )}}}) }
185 | 
186 | --------------------------------------------------------------------------------
187 | 
188 | 


--------------------------------------------------------------------------------
/Eval.hs:
--------------------------------------------------------------------------------
  1 | 
  2 | -- | The evaluator (interpreter). 
  3 | --
  4 | -- Note: this is not needed for compilation, it's simply provided as an extra.
  5 | --
  6 | 
  7 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-}
  8 | {-# LANGUAGE Strict #-}
  9 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-}
 10 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-}
 11 | 
 12 | module Eval where
 13 | 
 14 | --------------------------------------------------------------------------------
 15 | 
 16 | import Prelude ( Int , Char , Eq , Show )
 17 | import PrimGHC
 18 | 
 19 | --------------------------------------------------------------------------------
 20 | 
 21 | import Base
 22 | import Containers
 23 | import Types
 24 | import PrimOps
 25 | import DataCon
 26 | import Core
 27 | 
 28 | {-% include "Base.hs"       %-}
 29 | {-% include "Types.hs"      %-}
 30 | {-% include "Containers.hs" %-}
 31 | {-% include "PrimOps.hs"    %-}
 32 | {-% include "DataCon.hs"    %-}
 33 | {-% include "Core.hs"       %-}
 34 | 
 35 | --------------------------------------------------------------------------------
 36 | -- ** Runtime values
 37 | 
 38 | -- | Note: for recursive lets, we need some delaying mechanism, because
 39 | -- this is a strict language, so we don't have cyclic data structures.
 40 | --
 41 | -- The idea is that in case of a letrec, the first two argument of LamV
 42 | -- store the recursive part, and the third stores the rest of the environment.
 43 | data Value
 44 |   = IntV Int
 45 |   | ChrV Char
 46 |   | HdlV Handle
 47 |   | ConV Con (List Value)
 48 |   | LamV (Value -> EvalM Value)
 49 |   | IO_V Action
 50 | 
 51 | -- | This is a \"newtype\" only because the two layers of IO was really confusing...
 52 | data Action = Action (IO Value)
 53 | 
 54 | runAction :: Action -> IO Value
 55 | runAction action = case action of { Action m -> m }
 56 | 
 57 | showValue_ :: Value -> String
 58 | showValue_ value = showValue Nil value
 59 | 
 60 | showValue' :: StaticEnv -> Value -> String
 61 | showValue' statenv val = case statenv of { StaticEnv dconNames _ _ -> showValue dconNames val }
 62 | 
 63 | type DConNames = Map Int String
 64 | 
 65 | showValue :: DConNames -> Value -> String
 66 | showValue conNames value = case value of
 67 |   { IntV n -> showInt n
 68 |   ; ChrV c -> quoteChar c
 69 |   ; HdlV _ -> "<file handle>"
 70 |   ; ConV con args ->
 71 |       let { tag = case mapLookup con conNames of { Just name -> name ; Nothing -> appendInt "Con#" con } }
 72 |       in  case args of { Nil -> tag ; Cons _ _ -> parenString (unwords (Cons tag (map (showValue conNames) args))) }
 73 |   ; LamV _ -> "<lambda>"
 74 |   ; IO_V _ -> "<IO action>"
 75 |   }
 76 | 
 77 | printValue :: DConNames -> Value -> IO Unit
 78 | printValue conNames x = putStrLn (showValue conNames x)
 79 | 
 80 | eqValue :: Value -> Value -> Bool
 81 | eqValue val1 val2 = case val1 of
 82 |   { IntV i1 -> case val2 of { IntV i2 ->  eq i1 i2 ; _ -> False }
 83 |   ; ChrV c1 -> case val2 of { ChrV c2 -> ceq c1 c2 ; _ -> False }
 84 |   -- ; HdlV h1 -> case val2 of { HdlV h2 -> heq h1 h2 ; _ -> False }
 85 |   ; ConV con1 args1 -> case val2 of
 86 |       { ConV con2 args2 -> and3 (eq con1 con2) (eq (length args1) (length args2))
 87 |                                 (andList (zipWith eqValue args1 args2))
 88 |       ; _ -> False }
 89 |   ; LamV _   -> False
 90 |   ; IO_V _   -> False
 91 |   }
 92 | 
 93 | --------------------------------------------------------------------------------
 94 | -- ** Marshalling values
 95 | 
 96 | boolToValue :: Bool -> Value
 97 | boolToValue b = ifte b (ConV con_True Nil) (ConV con_False Nil)
 98 | 
 99 | valueToBool :: Value -> Bool
100 | valueToBool val = case val of { ConV con args -> eq con con_True ; _ -> error "marshalling: not a boolean" }
101 | 
102 | intToValue :: Int -> Value
103 | intToValue = IntV
104 | 
105 | valueToInt :: Value -> Int
106 | valueToInt val = case val of { IntV x -> x ; _ -> error "marshalling: not an integer" }
107 | 
108 | charToValue :: Char -> Value
109 | charToValue = ChrV
110 | 
111 | maybeCharToValue :: Maybe Char -> Value
112 | maybeCharToValue mb = case mb of { Nothing -> ConV con_Nothing Nil ; Just c -> ConV con_Just (singleton (ChrV c)) }
113 | 
114 | valueToChar :: Value -> Char
115 | valueToChar val = case val of { ChrV c -> c ; _ -> error "marshalling: not a character" }
116 | 
117 | unitToValue :: Unit -> Value
118 | unitToValue Unit = ConV con_Unit Nil
119 | 
120 | valueToUnit :: Value -> Unit
121 | valueToUnit val = case val of { ConV con _ -> ifte (eq con con_Unit) Unit err ; _ -> err } where
122 |   { err = error "marshalling: not a unit" }
123 | 
124 | handleToValue :: Handle -> Value
125 | handleToValue = HdlV
126 | 
127 | valueToHandle :: Value -> Handle
128 | valueToHandle val = case val of { HdlV h -> h ; _ -> error "marshalling: not a handle" }
129 | 
130 | valueToIOMode :: Value -> IOMode
131 | valueToIOMode value = case value of
132 |   { ConV con args -> ifte (eq con con_ReadMode      ) ReadMode      ( 
133 |                      ifte (eq con con_WriteMode     ) WriteMode     ( 
134 |                      ifte (eq con con_AppendMode    ) AppendMode    ( 
135 |                      ifte (eq con con_ReadWriteMode ) ReadWriteMode (err value))))
136 |   ; _ -> err value } where { err v = error (append "marshalling: not an IO mode: " (quoteString (showValue_ v))) }
137 |                           
138 | listToValue :: List Value -> Value
139 | listToValue list = case list of { Nil -> ConV con_Nil Nil
140 |   ; Cons x xs -> ConV con_Cons [ x , listToValue xs ] }
141 | 
142 | valueToList :: Value -> List Value
143 | valueToList value = case value of
144 |   { ConV con args -> ifte (neq con con_Cons) Nil
145 |       (case mbPair args of { Nothing -> Nil ; Just pair -> case pair of
146 |         { Pair u v -> Cons u (valueToList v) } } ) }
147 | 
148 | stringToValue :: String -> Value
149 | stringToValue = compose listToValue (map charToValue)
150 | 
151 | valueToString :: Value -> String
152 | valueToString = compose (map valueToChar) valueToList
153 | 
154 | maybeStringToValue :: Maybe String -> Value
155 | maybeStringToValue mb = case mb of { Nothing -> ConV con_Nothing Nil ; Just s -> ConV con_Just (singleton (stringToValue s)) }
156 | 
157 | literalToValue :: Literal -> Value
158 | literalToValue lit = case lit of
159 |   { IntL k       -> IntV k
160 |   ; ChrL c       -> ChrV c
161 |   ; StrL s       -> stringToValue s }
162 | 
163 | --------------------------------------------------------------------------------
164 | -- ** The environment
165 | 
166 | -- | The environment. We need some hacking to handle letrec blocks - as we cannot have cyclic data structure, we have to
167 | -- do something else. So we just simulate the stack allocation structure.
168 | data Env 
169 |   = NilEnv
170 |   | ConsEnv Value Env
171 |   | RecEnv  Int (List Term) Env
172 | 
173 | indexEnv :: StaticEnv -> Int -> Env -> Value
174 | indexEnv statEnv k env = go k env where 
175 |   { go k env = case env of
176 |     { NilEnv             -> error "indexEnv: indexing out of bounds"
177 |     ; ConsEnv val   rest -> ifte (eq k 0)  val                                       (go (dec   k  ) rest)
178 |     ; RecEnv  n tms rest -> ifte (lt k n) (mkLamV statEnv env (unLam (index k tms))) (go (minus k n) rest) }
179 |   ; unLam tm = case tm of { LamT nt -> forgetName nt ; _ -> error "indexEnv: fatal: recursive binding must be lambda" } }
180 | 
181 | lengthEnv :: Env -> Int
182 | lengthEnv env = case env of { NilEnv -> 0 ; ConsEnv _ rest -> inc (lengthEnv rest) ; RecEnv n _ rest -> plus n (lengthEnv rest) }
183 | 
184 | -- -- | for debugging only
185 | -- showEnv :: Env -> String
186 | -- showEnv env = Cons '[' (go env) where { go env = case env of
187 | --   { NilEnv            -> "]" 
188 | --   ; ConsEnv v rest    -> append3 (showValue_ v)     " : " (go rest)  
189 | --   ; RecEnv _ tms rest -> append3 (showTermList tms) " : " (go rest) }}
190 | -- 
191 | -- -- | for debugging only
192 | -- printEnv :: Env -> IO Unit
193 | -- printEnv env = go 0 env where { go k env = case env of
194 | --   { NilEnv            -> ioret_ 
195 | --   ; ConsEnv v rest    -> ioseq (putStrLn (append3 (showInt k) " => " (showValue_     v))) (go (inc  k)   rest)
196 | --   ; RecEnv n tms rest -> ioseq (putStrLn (append3 (showInt k) " => " (showTermList tms))) (go (plus k n) rest) }}
197 | 
198 | --------------------------------------------------------------------------------
199 | -- ** The evaluator
200 | 
201 | -- | The static execution environment: data constructors, string constants, index of @main@
202 | data StaticEnv = StaticEnv 
203 |   { _dcons   :: Map Int String
204 |   , _strLits :: List String
205 |   , _mainIdx :: TopIdx 
206 |   } 
207 |   deriving Show
208 | 
209 | pushEnv1 :: Value -> Env -> Env
210 | pushEnv1 value env = ConsEnv value env
211 | 
212 | pushEnvMany :: List Value -> Env -> Env
213 | pushEnvMany values env = case values of { Nil -> env ; Cons v vs -> pushEnvMany vs (pushEnv1 v env) }
214 | 
215 | pushEnvRec :: Int -> List Term -> Env -> Env
216 | pushEnvRec n tms env = RecEnv n tms env
217 | 
218 | evalVar :: StaticEnv -> Env -> Var -> Value
219 | evalVar statEnv env var = case var of
220 |   { IdxV idx -> indexEnv statEnv idx env
221 |   ; StrV k   -> case statEnv of { StaticEnv _dcons strlits _main -> stringToValue (index k strlits) }
222 |   ; TopV _   -> error "evalVar: top-level references shouldn't appear in terms"
223 |   ; LevV _   -> error "evalVar: de Bruijn levels shouldn't appear in terms" }
224 | 
225 | evalAtom :: StaticEnv -> Env -> Atom -> Value
226 | evalAtom statEnv env atom = case atom of 
227 |   { VarA var -> evalVar statEnv env (forgetName var)
228 |   ; KstA lit -> literalToValue lit
229 |   ; ConA con -> ConV (forgetName con) Nil }
230 | 
231 | mkLamV :: StaticEnv -> Env -> Term -> Value
232 | mkLamV statEnv env tm = LamV (\x -> eval statEnv (pushEnv1 x env) tm)
233 | 
234 | -- eval :: StaticEnv -> Env -> Term -> IO Value
235 | -- eval statEnv env term = 
236 | --   ioseq (iosequence_
237 | --   [ putStrLn (append ">> env  = " (showEnv env))
238 | --   , putStrLn (append ">> term = " (showTerm term))
239 | --   , putStrLn "" 
240 | --   ])
241 | --   (eval' statEnv env term)
242 | 
243 | type EvalM a = IO a
244 | 
245 | eval :: StaticEnv -> Env -> Term -> EvalM Value
246 | eval statEnv env term = case term of
247 |   { AtmT atom    -> ioreturn (evalAtom statEnv env atom)
248 |   ; AppT e1 a2   -> iobind (eval statEnv env e1) (\val1 -> case val1 of
249 |      { ConV con args  -> ioreturn (ConV con (snoc args (evalAtom statEnv env a2)))
250 |      ; LamV fun       -> let { val2 = evalAtom statEnv env a2 } in fun val2
251 |      ; _              -> error "eval: application to non-lambda" })
252 |   ; PriT primop vs -> case primop of { PrimOp _arity prim -> case statEnv of
253 |       { StaticEnv dconNames _ _ -> evalPrimM dconNames prim (map (evalAtom statEnv env) vs) }}
254 |   ; LZPT primop ts -> case primop of { PrimOp _arity prim -> case prim of
255 |      { IFTE -> ternary ts (lazyIFTE statEnv env)
256 |      ; And  -> binary  ts (lazyAnd  statEnv env)
257 |      ; Or   -> binary  ts (lazyOr   statEnv env)
258 |      ; _    -> error "eval: unrecognized lazy primop" }}
259 |   ; LetT nt1 t2   -> iobind (eval statEnv env (forgetName nt1)) (\x -> eval statEnv (pushEnv1 x env) t2)
260 |   ; LamT body     -> ioreturn (mkLamV statEnv env (forgetName body)) 
261 |   ; CasT latom brs -> let { scrutinee = evalAtom statEnv env (located latom) } in case scrutinee of
262 |      { ConV con args -> matchCon statEnv env con args brs
263 |      ; _             -> error (concat [ "eval: branching on a non-constructor at " , showLocation (location latom), "; scrutinee = " , showValue' statEnv scrutinee ]) }
264 |   ; RecT n nts tm -> let { env' = pushEnvRec n (map forgetName nts) env } in eval statEnv env' tm
265 |   ; MainT         -> case statEnv of { StaticEnv _ _ mainidx -> 
266 |                        let { k = minus (lengthEnv env) (inc mainidx) }
267 |                        in (ioreturn (indexEnv statEnv k env)) }
268 |   } 
269 | 
270 | matchCon :: StaticEnv -> Env -> Con -> List Value -> List BranchT -> EvalM Value
271 | matchCon statEnv env con args brs = go brs where
272 |   { nargs = length args
273 |   ; go branches = case branches of
274 |     { Nil -> error "non-exhaustive patterns in case"
275 |     ; Cons this rest -> case this of
276 |       { DefaultT rhs            -> eval statEnv env rhs
277 |       ; BranchT bcon bnargs rhs -> ifte (and (eq con (forgetName bcon)) (eq nargs bnargs))
278 |           (eval statEnv (pushEnvMany args env) rhs)
279 |           (go rest) } } }
280 | 
281 | --------------------------------------------------------------------------------
282 | -- ** Evaluating pure primops
283 | 
284 | evalfunII :: (Int -> Int) -> Value -> Value
285 | evalfunII f v1 = intToValue (f (valueToInt v1))
286 | 
287 | evalfunIII :: (Int -> Int -> Int) -> Value -> Value -> Value
288 | evalfunIII f v1 v2 = intToValue (f (valueToInt v1) (valueToInt v2))
289 | 
290 | evalfunIIB :: (Int -> Int -> Bool) -> Value -> Value -> Value
291 | evalfunIIB f v1 v2 = boolToValue (f (valueToInt v1) (valueToInt v2))
292 | 
293 | evalfunBB :: (Bool -> Bool) -> Value -> Value
294 | evalfunBB f v1 = boolToValue (f (valueToBool v1))
295 | 
296 | evalfunBBB :: (Bool -> Bool -> Bool) -> Value -> Value -> Value
297 | evalfunBBB f v1 v2 = boolToValue (f (valueToBool v1) (valueToBool v2))
298 | 
299 | evalPrimPure :: Prim -> List Value -> Value
300 | evalPrimPure prim args = case prim of
301 |   { Error   -> unary   args (compose error valueToString)
302 |   ; Negate  -> unary   args (evalfunII  negate)
303 |   ; Plus    -> binary  args (evalfunIII plus  )
304 |   ; Minus   -> binary  args (evalfunIII minus )
305 |   ; Times   -> binary  args (evalfunIII times )
306 |   ; Div     -> binary  args (evalfunIII div   )
307 |   ; Mod     -> binary  args (evalfunIII mod   )
308 |   ; BitAnd  -> binary  args (evalfunIII bitAnd) 
309 |   ; BitOr   -> binary  args (evalfunIII bitOr )
310 |   ; BitXor  -> binary  args (evalfunIII bitXor ) 
311 |   ; ShiftL  -> binary  args (evalfunIII shiftL)
312 |   ; ShiftR  -> binary  args (evalfunIII shiftR)
313 |   ; Chr     -> unary   args (compose3 charToValue chr valueToInt )
314 |   ; Ord     -> unary   args (compose3 intToValue  ord valueToChar)
315 |   ; Not     -> unary   args (evalfunBB  not )
316 |   ; IntEQ   -> binary  args (evalfunIIB eq  )
317 |   ; IntLT   -> binary  args (evalfunIIB lt  )
318 |   ; IntLE   -> binary  args (evalfunIIB le  )
319 |   ; StdIn   -> nullary args (handleToValue stdin )
320 |   ; StdOut  -> nullary args (handleToValue stdout)
321 |   ; StdErr  -> nullary args (handleToValue stderr)
322 |   ; _       -> error (append "evalPrim: unimplemented primop: " (quoteString (showPrim prim)))
323 |   }
324 | --  ; GenEQ   -> binary  args (\x y -> boolToValue (eqValue x y))
325 | --  ; IFTE    -> error "ifte: this has to be implemented somewhere else because of lazyness"
326 | --  ; And     -> binary  args (evalfunBBB and )
327 | --  ; Or      -> binary  args (evalfunBBB or  )
328 | 
329 | --------------------------------------------------------------------------------
330 | -- ** Evaluating IO primops
331 | 
332 | ioCompose1 :: (c -> d) -> (b -> IO c) -> (a -> b) -> a -> IO d
333 | ioCompose1 post action pre = \x -> iofmap post (action (pre x))
334 | 
335 | ioCompose2 :: (c -> d) -> (b1 -> b2 -> IO c) -> (a1 -> b1) -> (a2 -> b2) -> a1 -> a2 -> IO d
336 | ioCompose2 post action pre1 pre2 = \x y -> iofmap post (action (pre1 x) (pre2 y))
337 | 
338 | -- | Note: since the evaluator is in the IO monad, since it can evaluate
339 | -- side effects, this also results in @(Value -> EvalM Value)@ instead 
340 | -- of @(Value -> Value)@
341 | valueToLambda :: Value -> (Value -> EvalM Value)
342 | valueToLambda val = case val of { LamV fun -> fun ; _ -> error "marshalling: not a lambda" }
343 | 
344 | actionToValue :: Action -> Value
345 | actionToValue action = IO_V action
346 | 
347 | valueToAction :: Value -> Action
348 | valueToAction val = case val of { IO_V action -> action ; _ -> error "marshalling: not an IO action" }
349 | 
350 | actionJoin :: IO Action -> Action
351 | actionJoin io = Action (iojoin (iofmap runAction io))
352 | 
353 | bindAction :: Action -> (Value -> Action) -> Action
354 | bindAction act1 fun2 = Action (iobind (runAction act1) (compose runAction fun2))
355 | 
356 | -- | Assuming the values are the right shape for @iobind@, we bind them
357 | ioBindValue :: Value -> Value -> Value
358 | ioBindValue actionVal funVal = actionToValue (bindAction a b) where
359 |   { a = valueToAction actionVal 
360 |   ; b = \val -> actionJoin (iofmap valueToAction (valueToLambda funVal val)) }
361 | 
362 | -- | Note: This has to be The environment is only used for @IOBind@, which takes a lambda as second argument.
363 | evalPrimM :: DConNames -> Prim -> List Value -> EvalM Value
364 | evalPrimM dconNames prim args = case prim of
365 |   { RunIO    -> runAction (unary  args valueToAction) 
366 |   ; _        -> ioreturn (evalPrimPureIO dconNames prim args) }
367 | 
368 | evalPrimPureIO :: DConNames -> Prim -> List Value -> Value 
369 | evalPrimPureIO dconNames prim args = case prim of
370 |   { GetChar  -> mkIO (nullary args (iofmap     maybeCharToValue   getChar )                             )
371 |   ; GetArg   -> mkIO (unary   args (ioCompose1 maybeStringToValue getArg   (compose inc2 valueToInt))   )
372 |   ; PutChar  -> mkIO (unary   args (ioCompose1 unitToValue        putChar  valueToChar  )               )
373 |   ; Exit     -> mkIO (unary   args (ioCompose1 unitToValue        exit     valueToInt   )               )
374 |   ; OpenFile -> mkIO (binary  args (ioCompose2 handleToValue      openFile valueToString valueToIOMode) )
375 |   ; HClose   -> mkIO (unary   args (ioCompose1 unitToValue        hClose   valueToHandle)               )    
376 |   ; HGetChar -> mkIO (unary   args (ioCompose1 maybeCharToValue   hGetChar valueToHandle)               )    
377 |   ; HPutChar -> mkIO (binary  args (ioCompose2 unitToValue        hPutChar valueToHandle valueToChar  ) )
378 |   ; HPutStr  -> mkIO (binary  args (ioCompose2 unitToValue        hPutStr  valueToHandle valueToString) )   
379 |   ; Print    -> mkIO (unary   args (ioCompose1 unitToValue        putStrLn (showValue dconNames))       )
380 |   ; IOReturn -> mkIO (unary   args ioreturn)
381 |   ; IOBind   -> binary args ioBindValue                      
382 |   ; _        -> evalPrimPure prim args
383 |   } where { mkIO ioaction = actionToValue (Action ioaction) }
384 | 
385 | --------------------------------------------------------------------------------
386 | -- *** Lazy primops
387 | 
388 | lazyIFTE :: StaticEnv -> Env -> Term -> Term -> Term -> IO Value
389 | lazyIFTE statEnv env tb tx ty = iobind (eval statEnv env tb) (\vb -> 
390 |   ifte (valueToBool vb) (eval statEnv env tx) (eval statEnv env ty))
391 | 
392 | lazyAnd :: StaticEnv -> Env -> Term -> Term -> IO Value
393 | lazyAnd statEnv env t1 t2 = iobind (eval statEnv env t1) (\v1 -> 
394 |   ifte (valueToBool v1) (eval statEnv env t2) (ioreturn (boolToValue False)))
395 | 
396 | lazyOr :: StaticEnv -> Env -> Term -> Term -> IO Value
397 | lazyOr statEnv env t1 t2 = iobind (eval statEnv env t1) (\v1 -> 
398 |   ifte (valueToBool v1) (ioreturn (boolToValue True)) (eval statEnv env t2))
399 | 
400 | --------------------------------------------------------------------------------
401 | 
402 | 


--------------------------------------------------------------------------------
/Inliner.hs:
--------------------------------------------------------------------------------
  1 | 
  2 | -- | Inlining small functions + some basic optimizations
  3 | 
  4 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-}
  5 | {-# LANGUAGE Strict #-}
  6 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-}
  7 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-}
  8 | 
  9 | module Inliner where
 10 | 
 11 | --------------------------------------------------------------------------------
 12 | 
 13 | import Prelude ( Int , Char , Eq , Show )
 14 | import PrimGHC
 15 | 
 16 | --------------------------------------------------------------------------------
 17 | 
 18 | import Base
 19 | import Containers
 20 | import Types
 21 | import Core
 22 | 
 23 | {-% include "Base.hs"       %-}
 24 | {-% include "Containers.hs" %-}
 25 | {-% include "Types.hs"      %-}
 26 | {-% include "Core.hs"       %-}
 27 | 
 28 | --------------------------------------------------------------------------------
 29 | -- ** Inliner
 30 | 
 31 | data InlineTm
 32 |   = NoInline
 33 |   | Inline Level Term
 34 |   deriving Show
 35 | 
 36 | type InlineEnv = List InlineTm
 37 | 
 38 | -- | temp debugging
 39 | data TermSize = TermSize Int Int deriving Show
 40 | 
 41 | -- | NB: we should never inline stuff which does computation!
 42 | isInlineableTerm :: Term -> Bool
 43 | isInlineableTerm term = case term of
 44 |   { LamT _   -> True
 45 |   ; AtmT _   -> True
 46 |   ; LetT _ t -> isInlineableTerm t
 47 |   ; _        -> False }
 48 |   
 49 | insertIfSmall :: Size -> Level -> Term -> InlineEnv -> InlineEnv
 50 | insertIfSmall limit level term scope = case isInlineableTerm term of
 51 |   { True  -> let { this = ifte (gt (termSize term) limit) NoInline (Inline level term) } in (Cons this scope)
 52 |   ; False -> Cons NoInline scope }
 53 | 
 54 | addNoInlines :: Int -> InlineEnv -> InlineEnv
 55 | addNoInlines n env = ifte (le n 0) env (addNoInlines (dec n) (Cons NoInline env))
 56 | 
 57 | optimizeCorePrg :: CoreProgram -> CoreProgram
 58 | optimizeCorePrg prg = let { limit = 24 } in 
 59 |   (inlineCorePrg limit (inlineCorePrg limit
 60 |   (inlineCorePrg limit (inlineCorePrg limit
 61 |   (inlineCorePrg limit (inlineCorePrg limit prg))))))
 62 | 
 63 | inlineCorePrg :: Size -> CoreProgram -> CoreProgram
 64 | inlineCorePrg sizeLimit coreprg = case coreprg of 
 65 |   { CorePrg blocks mainIdx mainTerm -> let 
 66 |      { bigterm'  = removeUnusedLets mainIdx (inlineLets sizeLimit (programToTerm blocks))
 67 |      ; mainIdx'  = findToplevelMain bigterm'
 68 |      ; blocks'   = termToProgram    bigterm'
 69 |      ; mainTerm' = AtmT (VarA (Named "$main" (TopV mainIdx')))
 70 |      } in CorePrg blocks' mainIdx' mainTerm' } 
 71 | 
 72 | inlineLets :: Size -> Term -> Term
 73 | inlineLets sizeLimit term = go 0 Nil term where
 74 |   { goIdx name level env i = let { what = index i env } in case what of 
 75 |       { NoInline -> AtmT (VarA (Named name (IdxV i))) 
 76 |       ; Inline ilevel iterm -> shiftTerm ilevel level iterm }
 77 |   ; goAtom level env atom = case atom of { VarA nvar -> case nvar of { Named name var -> case var of
 78 |       { LevV j   -> goIdx name level env (minus level (inc j)) 
 79 |       ; IdxV i   -> goIdx name level env i
 80 |       ; _        -> AtmT atom } } ; _ -> AtmT atom }
 81 |   ; go level env term = case term of
 82 |       { AtmT atom     -> goAtom level env atom
 83 |       ; LamT nt       -> LamT (nfmap (go (inc level) (Cons NoInline env)) nt)
 84 |       ; AppT t a      -> AppT (go level env t) a
 85 |       ; PriT p as     -> PriT p as
 86 |       ; LZPT p ts     -> LZPT p (map (go level env) ts)
 87 |       -- ; LetT nt1 t2   -> let { nt1' = (nfmap (go level env) nt1) 
 88 |       ; LetT nt1 t2   -> let { nt1' = nfmap (\t -> betaReduceTerm level (go level env t)) nt1 
 89 |                              ; env' = insertIfSmall sizeLimit level (forgetName nt1') env } 
 90 |                          in  LetT nt1' (go (inc level) env' t2)
 91 |       ; RecT n nts t  -> let { level' = plus level n ; env' = addNoInlines n env } 
 92 |                          in  RecT n (map (nfmap (go level' env')) nts) (go level' env' t)
 93 |       ; CasT la brs   -> CasT la (map (goBranch level env) brs) 
 94 |       ; MainT         -> MainT }
 95 |   ; goBranch level env branch = case branch of  
 96 |       { BranchT c n t -> BranchT c n (go (plus level n) (addNoInlines n env) t)
 97 |       ; DefaultT    t -> DefaultT    (go       level                    env  t) } }
 98 | 
 99 | --------------------------------------------------------------------------------
100 | 
101 | -- ex1 = LamT (Named "x" (
102 | --         LetT (Named "p" (AtmT (KstA (IntL 666))))
103 | --           (PriT (PrimOp 2 Plus) [ VarA (Named "p" (IdxV 0)) , KstA (IntL 1) ] )))
104 | -- 
105 | -- ex2 = LamT (Named "x" (
106 | --         LetT (Named "p" (AtmT (KstA (IntL 666))))
107 | --           (PriT (PrimOp 2 Plus) [ VarA (Named "x" (IdxV 1)) , KstA (IntL 1) ] )))
108 | 
109 | --------------------------------------------------------------------------------
110 | 
111 | -- | NB: we have to know the @main@, otherwise we would eliminate /everything/
112 | removeUnusedLets :: TopIdx -> Term -> Term
113 | removeUnusedLets mainIdx term = snd (go 0 term) where
114 |   { 
115 |  -- goLev :: Level ->  Name -> Level -> Pair IntSet (Named Var)
116 |     goLev level name j = Pair (setSingleton j) (Named name (LevV j)) 
117 |  -- goAtom :: Level -> Atom -> Pair IntSet Atom
118 |   ; goAtom level atom = case atom of { VarA nvar -> case nvar of { Named name var -> case var of
119 |       { LevV j   -> second VarA (goLev level name j)
120 |       ; IdxV i   -> second VarA (goLev level name (minus level (inc i)))
121 |       ; _        -> Pair setEmpty atom } } ; _ -> Pair setEmpty atom }
122 |  -- go :: Level -> Term -> Pair IntSet Term
123 |   ; go level term = 
124 |                     -- debug "level" level (debug "term" term (debug_ "term'" (
125 |                     case term of 
126 |       { AtmT atom      -> second AtmT (goAtom level atom)
127 |       ; LamT nt        -> case go (inc level) (forgetName nt) of { Pair set t' ->
128 |                             Pair (setDelete level set) (LamT (Named (nameOf nt) t')) }
129 |       ; AppT t a       -> case go     level t of { Pair set1 t' -> 
130 |                           case goAtom level a of { Pair set2 a' -> Pair (setUnion set1 set2) (AppT t' a') }}
131 |       ; PriT p as      -> let { pairs = map (goAtom level) as } in Pair (setUnions (map fst pairs)) (PriT p (map snd pairs)) 
132 |       ; LZPT p ts      -> let { pairs = map (go     level) ts } in Pair (setUnions (map fst pairs)) (LZPT p (map snd pairs)) 
133 |       ; LetT nt1 t2    -> case nt1 of { Named name t1 -> case go level t1 of { Pair free1 t1' ->
134 |                           case go (inc level) t2 of { Pair free2 t2' -> ifte (setMember level free2)
135 |                             (Pair (setUnion free1 (setDelete level free2)) (LetT (Named name t1') t2'))
136 |                             (Pair free2 (shiftTerm (inc level) level t2')) }}}
137 |       ; RecT n nts tm  -> let { level' = plus level n ; levels = rangeFrom level n 
138 |                               ; pairs  = map (\nt -> case nt of { Named named t -> case go level' t of 
139 |                                   { Pair set t' -> Pair set (Named named t') }}) nts } 
140 |                           in  case go level' tm of { Pair free2 tm' -> ifte (setIsDisjoint levels free2)
141 |                                 (Pair free2 (shiftTerm level' level tm'))                                
142 |                                    (Pair (setUnion free2 (setDeleteMany levels (setUnions (map fst pairs)))) 
143 |                                       (RecT n (map snd pairs) tm')) }
144 |       ; CasT la brs    -> case la of { Located loc a -> case goAtom level a of { Pair set1 a' -> 
145 |                           let { pairs = map (goBranch level) brs } 
146 |                           in  Pair (setUnion set1 (setUnions (map fst pairs))) (CasT (Located loc a') (map snd pairs)) }} 
147 |       ; MainT          -> Pair (setSingleton mainIdx) MainT } 
148 |                            -- (AtmT (VarA (Named "$main" (LevV mainIdx)))) } )))
149 |  -- goBranch :: Level -> BranchT -> Pair IntSet BranchT
150 |   ; goBranch level branch = case branch of
151 |       { DefaultT rhs    -> case go level rhs of { Pair set rhs' -> Pair set (DefaultT rhs') }
152 |       ; BranchT c n rhs -> let { level' = plus level n } in case go level' rhs of { Pair set rhs' -> 
153 |                              let { set' = setDeleteMany (rangeFrom level n) set } in Pair set' (BranchT c n rhs') }} }
154 | 
155 | --------------------------------------------------------------------------------
156 | -- ** Beta reduction
157 | 
158 | --_id    = LamT (Named "a" ( AtmT (VarA (Named "a" (IdxV 0)))))
159 | --_const = LamT (Named "x" (
160 | --         LamT (Named "y" ( AtmT (VarA (Named "x" (IdxV 1)))))))
161 | --_kst5 = (KstA (IntL 5))
162 | --_kst8 = (KstA (IntL 8))
163 | --_idx0 = VarA (Named "$0" (IdxV 0))
164 | --_idx1 = VarA (Named "$1" (IdxV 1))
165 | --_idx2 = VarA (Named "$2" (IdxV 2))
166 | --app1  f x    = AppT f x
167 | --app2 f x y = AppT (AppT f x) y
168 | 
169 | betaReduceTerm :: Level -> Term -> Term
170 | betaReduceTerm level term = transformTerm worker level term where
171 |   { worker level term = case term of { AppT f arg -> case f of { LamT nbody -> case nbody of { Named name body -> 
172 |       let { arg'  = atomIndexToLevel level arg
173 |           ; body' = transformAtoms (substitute1 level arg') (inc level) body 
174 |           ; final = shiftTerm (inc level) level body' }
175 |       in  final
176 |            -- debug "=========================" Unit (
177 |            -- debug "level" level( 
178 |            -- debug "f/arg" (Pair (showTerm f) (showAtom arg)) (
179 |            -- debug "body"  (showTerm body)  (
180 |            -- debug "arg'"  (showAtom arg' ) (
181 |            -- debug "body'" (showTerm body') (
182 |            -- debug "final" (showTerm final)  
183 |            --  final))))))
184 |            } ; _ -> term } ; _ -> term }
185 |   ; substitute1 level0 what level atom =  
186 |       let { handleLev name j = ifte (eq j level0) what (VarA (Named name (LevV j))) }
187 |       in  case atom of { VarA nvar -> case nvar of { Named name var -> case var of 
188 |             { IdxV i -> handleLev name (minus level (inc i))
189 |             ; LevV j -> handleLev name j
190 |             ; _ -> atom }} ; _ -> atom } }
191 | 
192 | --------------------------------------------------------------------------------
193 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2021, Balazs Komuves
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | - Redistributions of source code must retain the above copyright notice,
 8 | this list of conditions and the following disclaimer.
 9 |  
10 | - Redistributions in binary form must reproduce the above copyright notice,
11 | this list of conditions and the following disclaimer in the documentation
12 | and/or other materials provided with the distribution.
13 |  
14 | - Neither names of the copyright holders nor the names of the contributors
15 | may be used to endorse or promote products derived from this software without
16 | specific prior written permission. 
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 
22 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 
30 | 


--------------------------------------------------------------------------------
/Nano.hs:
--------------------------------------------------------------------------------
  1 | 
  2 | -- | The compiler main module
  3 | --
  4 | 
  5 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-}
  6 | {-# LANGUAGE Strict #-}
  7 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-}
  8 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-}
  9 | 
 10 | module Nano where
 11 | 
 12 | --------------------------------------------------------------------------------
 13 | 
 14 | import Prelude ( Int , Char , Eq , Show )
 15 | import PrimGHC
 16 | 
 17 | --------------------------------------------------------------------------------
 18 | 
 19 | import Base
 20 | import Containers
 21 | import Types
 22 | import PrimOps
 23 | import DataCon
 24 | import Syntax
 25 | import Parser
 26 | import Dependency
 27 | import Core
 28 | import ScopeCheck
 29 | import Inliner
 30 | import Closure
 31 | import CodeGen
 32 | import Eval
 33 | 
 34 | {-% include "Base.hs"       %-}
 35 | {-% include "Containers.hs" %-}
 36 | {-% include "Types.hs"      %-}
 37 | {-% include "PrimOps.hs"    %-}
 38 | {-% include "DataCon.hs"    %-}
 39 | {-% include "Syntax.hs"     %-}
 40 | {-% include "Parser.hs"     %-}
 41 | {-% include "Dependency.hs" %-}
 42 | {-% include "Core.hs"       %-}
 43 | {-% include "ScopeCheck.hs" %-}
 44 | {-% include "Inliner.hs"    %-}
 45 | {-% include "Closure.hs"    %-}
 46 | {-% include "CodeGen.hs"    %-}
 47 | {-% include "Eval.hs"       %-}
 48 | 
 49 | --------------------------------------------------------------------------------
 50 | -- * Compiler entry point 
 51 | 
 52 | -- | GHC \/ nanohs shared entry point
 53 | main = runIO nanoMain 
 54 | 
 55 | -- | Nano entry point
 56 | nanoMain :: IO Unit
 57 | nanoMain = iobind getArgs (\args -> case args of { Nil -> printUsage ; Cons cmd args' -> handleCommand cmd args' }) where
 58 |   { handleCommand cmd args = case cmd of { Cons dash cmd1 -> ifte (cneq dash '-') printUsage (case cmd1 of { Cons c _ -> 
 59 |       ifte (ceq c 'i') (interpret     args) (
 60 |       ifte (ceq c 'c') (compile False args) ( 
 61 |       ifte (ceq c 'o') (compile True  args) printUsage)) })}
 62 |   ; interpret      args = case args of { Cons input rest -> runInterpreter input ; _ -> printUsage } 
 63 |   ; compile   flag args = case args of { Cons input rest -> case rest of { Cons output _ -> runCompiler flag input output ; _ -> printUsage } ; _ -> printUsage }}
 64 | 
 65 | printUsage :: IO Unit
 66 | printUsage = iomapM_ putStrLn 
 67 |   [ "usage:"
 68 |   , "./nanohs -i <input.hs>               # interpret"
 69 |   , "./nanohs -c <input.hs> <output.c>    # compile without optimizations"    
 70 |   , "./nanohs -o <input.hs> <output.c>    # compile with optimizations"     ] 
 71 | 
 72 | runCompiler :: Bool -> FilePath -> FilePath -> IO Unit
 73 | runCompiler optimize inputFn outputFn = iobind (loadModules Compile inputFn) (\prgdata -> case prgdata of { 
 74 |   PrgData strlits dconTrie coreprg -> 
 75 |     iosequence_ 
 76 |       [ putStrLn (append "compiling with optimizations " (ifte optimize "enabled" "disabled"))
 77 |       , let { lprogram = coreProgramToLifted (ifte optimize (optimizeCorePrg coreprg) coreprg)
 78 |             ; code     = runCodeGenM_ (liftedProgramToCode inputFn strlits dconTrie lprogram)
 79 |             } in writeLines outputFn code 
 80 |       , putStrLn "done." ]})
 81 | 
 82 | runInterpreter :: FilePath -> IO Unit
 83 | runInterpreter inputFn = iobind (loadModules Interpret inputFn) (\prgdata -> case prgdata of { 
 84 |   PrgData strlits dconTrie coreprg -> case coreprg of { CorePrg blocks mainIdx _main ->
 85 |     ioseq (putStrLn "interpreting...") (let 
 86 |       { bigterm = termLevelsToIndices 0 (programToTerm blocks)
 87 |       ; dconNames = mapFromList (map swap (trieToList dconTrie))
 88 |       ; staticEnv = StaticEnv dconNames strlits mainIdx
 89 |       } in iobind (eval staticEnv NilEnv bigterm) (printValue dconNames) )}})
 90 |     
 91 | --------------------------------------------------------------------------------
 92 | -- ** Load and parse source files
 93 | 
 94 | data ProgramData = PrgData (List String) DataConTable CoreProgram
 95 | 
 96 | loadModules :: Mode -> FilePath -> IO ProgramData
 97 | loadModules mode inputFn =
 98 |   iobind (loadAndParse1 Nil inputFn) (\pair -> case pair of { Pair files toplevs -> (let 
 99 |       { defins0  = catMaybes (map mbDefin toplevs)
100 |       ; dpair    = extractStringConstants defins0 } in case dpair of { Pair strlits defins1 -> let 
101 |         { dconTrie = collectDataCons (map located defins1)
102 |         ; program  = reorderProgram  defins1
103 |         ; coreprg  = programToCoreProgram mode dconTrie program
104 |         } in ioreturn (PrgData strlits dconTrie coreprg) })}) 
105 | 
106 | type Files  = List FilePath
107 | type Loaded = Pair Files (List TopLevel)
108 | 
109 | loadAndParseMany :: Files -> List FilePath -> IO Loaded
110 | loadAndParseMany sofar fnames = case fnames of { Nil -> ioreturn (Pair sofar Nil) ; Cons this rest ->
111 |   iobind (loadAndParse1    sofar  this) (\loaded -> case loaded of { Pair sofar'  toplevs1 ->
112 |   iobind (loadAndParseMany sofar' rest) (\loaded -> case loaded of { Pair sofar'' toplevs2 ->
113 |   ioreturn (Pair sofar'' (append toplevs1 toplevs2)) }) }) }
114 | 
115 | loadAndParse1 :: Files -> FilePath -> IO Loaded
116 | loadAndParse1 sofar fname = case stringMember fname sofar of
117 |   { True  -> ioreturn (Pair sofar Nil)
118 |   ; False -> ioseq (putStrLn (append "+ " fname)) (iobind (readFile fname) (\text -> let 
119 |       { blocks   = lexer fname text
120 |       ; toplevs  = map (parseTopLevelBlock fname) blocks
121 |       ; includes = filterIncludes toplevs
122 |       ; sofar'   = Cons fname sofar
123 |       } in iobind (loadAndParseMany sofar' includes) (\loaded -> case loaded of { Pair sofar'' toplevs2 -> 
124 |            ioreturn (Pair sofar'' (append toplevs toplevs2)) }))) }
125 | 
126 | --------------------------------------------------------------------------------
127 | 


--------------------------------------------------------------------------------
/Parser.hs:
--------------------------------------------------------------------------------
  1 | 
  2 | -- | Parsing
  3 | 
  4 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-}
  5 | {-# LANGUAGE Strict #-}
  6 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-}
  7 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-}
  8 | 
  9 | module Parser where
 10 | 
 11 | --------------------------------------------------------------------------------
 12 | 
 13 | import Prelude ( Int , Char , Eq , Show )
 14 | import PrimGHC
 15 | 
 16 | --------------------------------------------------------------------------------
 17 | 
 18 | import Base
 19 | import Types
 20 | import Syntax
 21 | 
 22 | {-% include "Base.hs"   %-}
 23 | {-% include "Types.hs"  %-}
 24 | {-% include "Syntax.hs" %-}
 25 | 
 26 | --------------------------------------------------------------------------------
 27 | -- * Parsing
 28 | 
 29 | mbVar  :: Token -> Maybe Name
 30 | mbLit  :: Token -> Maybe Literal
 31 | mbSpec :: Token -> Maybe Special
 32 | 
 33 | mbVar  tok = case tok of { VarTok  v -> Just v ; _ -> Nothing }
 34 | mbLit  tok = case tok of { LitTok  l -> Just l ; _ -> Nothing }
 35 | mbSpec tok = case tok of { SpecTok s -> Just s ; _ -> Nothing }
 36 | 
 37 | mbVarL :: Token -> Maybe Name
 38 | mbVarU :: Token -> Maybe Name
 39 | mbVarL tok = case mbVar tok of { Just str -> ifte (isLower_ (head str)) (Just str) Nothing }  
 40 | mbVarU tok = case mbVar tok of { Just str -> ifte (isUpper  (head str)) (Just str) Nothing }  
 41 | 
 42 | mbStrLit  :: Token -> Maybe String
 43 | mbStrLit tok = case mbLit tok of { Nothing -> Nothing ; Just lit -> 
 44 |   case lit of { StrL s -> Just s ; _ -> Nothing } }
 45 | 
 46 | isNotWhite :: Token -> Bool
 47 | isNotWhite tok = case tok of { WhiteTok -> False ; _ -> True }
 48 | 
 49 | locatedL :: Parser tok a -> Parser tok (Located a)
 50 | locatedL parser = pbind getFn     (\fname ->
 51 |                   pbind getSrcPos (\pos1  ->
 52 |                   pbind parser    (\what  ->
 53 |                   pbind getSrcPos (\pos2  -> preturn (Located (Loc fname pos1 pos2) what)))))
 54 | 
 55 | type LList a = List (Located a)
 56 | type LString = LList Char
 57 | 
 58 | addLocations :: FilePath -> String -> LString
 59 | addLocations fname text = go startSrcPos text where { go pos str = case str of { Nil -> Nil
 60 |   ; Cons x xs -> let { pos' = nextSrcPos x pos } in Cons (Located (Loc fname pos pos') x) (go pos' xs) } }
 61 | 
 62 | --------------------------------------------------------------------------------
 63 | -- ** Parser combinators
 64 | 
 65 | data ParseResult tok a
 66 |   = ParseOk a SrcPos (LList tok)
 67 |   | ParseErr  SrcPos
 68 |   deriving Show
 69 | 
 70 | type Parser tok a = FilePath -> SrcPos -> LList tok -> ParseResult tok a
 71 | 
 72 | runParser' :: FilePath -> Parser tok a -> LList tok -> ParseResult tok a
 73 | runParser' fname parser input = case input of { Nil -> error "empty input"
 74 |   ; Cons locx _ -> parser fname (locatedStart locx) input }
 75 | 
 76 | runParser :: FilePath -> Parser tok a -> LList tok -> Either String a
 77 | runParser fname p input = case runParser' fname p input of
 78 |   { ParseOk x pos rest -> case rest of
 79 |     { Nil -> Right x
 80 |     ; Cons _ _ ->   Left (append "unexpected extra token at " (showSrcPos' fname pos)) }
 81 |   ; ParseErr pos -> Left (append "parse error at "            (showSrcPos' fname pos)) }
 82 | 
 83 | runParser_ :: FilePath -> Parser tok a -> LList tok -> a
 84 | runParser_ fname p input = case runParser fname p input of { Right y -> y ; Left msg -> error msg }
 85 | 
 86 | preturn :: a -> Parser tok a
 87 | preturn x fn pos str = ParseOk x pos str
 88 | 
 89 | pfail :: Parser tok a
 90 | pfail fn pos str = ParseErr pos
 91 | 
 92 | pfmap :: (a -> b) -> Parser tok a -> Parser tok b
 93 | pfmap f p = \fn pos str -> case p fn pos str of
 94 |   { ParseOk x pos' str' -> ParseOk (f x) pos' str'
 95 |   ; ParseErr  pos'      -> ParseErr pos' }
 96 | 
 97 | preplace :: b -> Parser tok a -> Parser tok b
 98 | preplace x = pfmap (const x)
 99 | 
100 | pbind :: Parser tok a -> (a -> Parser tok b) -> Parser tok b
101 | pbind p u = \fn pos str -> case p fn pos str of
102 |   { ParseOk x pos' str' -> u x fn pos' str'
103 |   ; ParseErr  pos'      -> ParseErr pos' }
104 | 
105 | pseq  :: Parser tok a -> Parser tok b -> Parser tok b
106 | pseq  p q = pbind p (\_ -> q)
107 | 
108 | ppost :: Parser tok a -> Parser tok b -> Parser tok a
109 | ppost p q = pbind p (\x -> pbind q (\_ -> preturn x))
110 | 
111 | getFn :: Parser tok FilePath
112 | getFn = \fn pos str -> ParseOk fn pos str
113 | 
114 | getSrcPos :: Parser tok SrcPos
115 | getSrcPos = \fn pos str -> ParseOk pos pos str
116 | 
117 | alternative :: Parser tok a -> Parser tok a -> Parser tok a
118 | alternative p q = \fn pos str -> case p fn pos str of
119 |   { ParseOk x pos' str' -> ParseOk x pos' str'
120 |   ; ParseErr _          -> q fn pos str }
121 | 
122 | choice :: List (Parser tok a) -> Parser tok a
123 | choice list = case list of { Nil -> pfail ; Cons p ps -> alternative p (choice ps) }
124 | 
125 | optional :: Parser tok a -> Parser tok (Maybe a)
126 | optional p = \fn pos str -> case p fn pos str of
127 |   { ParseOk x pos' str' -> ParseOk (Just x) pos' str'
128 |   ; ParseErr  _pos'     -> ParseOk Nothing  pos  str  }
129 | 
130 | option :: a -> Parser tok a -> Parser tok a
131 | option x0 p = \fn pos str -> case p fn pos str of
132 |   { ParseOk x pos' str'  -> ParseOk x  pos' str'
133 |   ; ParseErr  _pos'      -> ParseOk x0 pos  str  }
134 | 
135 | many :: Parser tok a -> Parser tok (List a)
136 | many p = pbind (optional p) (\mb -> case mb of
137 |   { Nothing -> preturn Nil ; Just x -> pbind (many p) (\xs -> preturn (Cons x xs)) })
138 | 
139 | many1 :: Parser tok a -> Parser tok (List a)
140 | many1 p = pbind p        (\x ->
141 |           pbind (many p) (\xs -> preturn (Cons x xs)))
142 | 
143 | manyTill :: Parser tok end -> Parser tok a -> Parser tok (List a)
144 | manyTill end p = go Fake where { go Fake = alternative (preplace Nil end)
145 |   (pbind p (\x -> pbind (go Fake) (\xs -> preturn (Cons x xs)))) }
146 | 
147 | sepEndBy :: Parser tok sep -> Parser tok end -> Parser tok a -> Parser tok (List a)
148 | sepEndBy sep end p = alternative (preplace Nil end) (sepEndBy1 sep end p)
149 | 
150 | sepEndBy1 :: Parser tok sep -> Parser tok end -> Parser tok a -> Parser tok (List a)
151 | sepEndBy1 sep end p = pbind p (\x -> alternative
152 |   (preplace (Cons x Nil) end)
153 |   (pseq sep (pbind (sepEndBy sep end p) (\xs -> preturn (Cons x xs)))))
154 | 
155 | accept :: (tok -> Maybe a) -> Parser tok a
156 | accept f fn pos toks = case toks of
157 |   { Nil -> ParseErr pos
158 |   ; Cons locx xs -> case locx of { Located loc x -> case f x of
159 |     { Just y    -> ParseOk y (locEnd loc) xs
160 |     ; Nothing   -> ParseErr pos } } }
161 | 
162 | satisfy :: (tok -> Bool) -> Parser tok tok
163 | satisfy f fn pos toks = case toks of
164 |   { Nil -> ParseErr pos
165 |   ; Cons locx xs -> case locx of { Located loc x -> case f x of
166 |     { True  -> ParseOk x (locEnd loc) xs
167 |     ; False -> ParseErr pos } } }
168 | 
169 | anyToken :: Parser tok tok
170 | anyToken = satisfy (\_ -> True) 
171 | 
172 | token :: Eq tok => tok -> Parser tok tok
173 | token t = satisfy (geq t) 
174 | 
175 | tokens :: Eq tok => List tok -> Parser tok (List tok)
176 | tokens toks = case toks of { Nil -> preturn Nil ; Cons t ts ->
177 |   pbind (token t) (\x -> pbind (tokens ts) (\xs -> preturn (Cons x xs))) }
178 | 
179 | charToken :: Char -> Parser Char Char
180 | charToken t = satisfy (ceq t) 
181 | 
182 | charTokens :: String -> Parser Char String
183 | charTokens toks = case toks of { Nil -> preturn Nil ; Cons t ts ->
184 |   pbind (charToken t) (\x -> pbind (charTokens ts) (\xs -> preturn (Cons x xs))) }
185 | 
186 | oneOf :: Eq tok => List tok -> Parser tok tok
187 | oneOf list = satisfy (\x -> elem x list)
188 | 
189 | noneOf :: Eq tok => List tok -> Parser tok tok
190 | noneOf list = satisfy (\x -> not (elem x list))
191 | 
192 | charOneOf :: List Char -> Parser Char Char
193 | charOneOf list = satisfy (\x -> charElem x list)
194 | 
195 | charNoneOf :: List Char -> Parser Char Char
196 | charNoneOf list = satisfy (\x -> not (charElem x list))
197 | 
198 | eof :: Parser tok Unit
199 | eof fn pos str = case str of { Nil -> ParseOk Unit pos str ; Cons _ _ -> ParseErr pos }
200 | 
201 | --------------------------------------------------------------------------------
202 | -- * Parsing the pseudo-Haskell syntax
203 | -- ** Lexer
204 | 
205 | type Lexer a = Parser Char a
206 | 
207 | digitL    = satisfy isDigit
208 | lowerL    = satisfy isLower
209 | upperL    = satisfy isUpper
210 | alphaL    = satisfy isAlpha
211 | _alphaL   = satisfy (\ch -> or (ceq ch '_') (isAlpha ch))
212 | alphaNumL = satisfy isAlphaNum
213 | 
214 | spaces0 :: Lexer Int
215 | spaces0 = pfmap length (many (charToken ' '))
216 | 
217 | spaces1 :: Lexer Int
218 | spaces1 = pfmap length (many1 (charToken ' '))
219 | 
220 | digitsL :: Lexer String
221 | digitsL = many1 digitL
222 | 
223 | natL :: Lexer Int
224 | natL = pbind digitsL (\xs -> case readNat xs of { Just n -> preturn n ; Nothing -> pfail })
225 | 
226 | intL :: Lexer Int
227 | intL = pbind (optional (charToken '-'))
228 |   (\mb -> case mb of { Nothing -> natL ; Just _ -> pfmap negate natL })
229 | 
230 | charLiteralL :: Lexer Char
231 | charLiteralL =
232 |   pbind (charToken singleQuoteC) (\_ ->
233 |   pbind (anyToken              ) (\c ->
234 |   pbind (charToken singleQuoteC) (\_ -> preturn c)))
235 | 
236 | stringLiteralL :: Lexer String
237 | stringLiteralL =
238 |   pbind (charToken doubleQuoteC)                 (\_  ->
239 |   pbind (many (satisfy (cneq doubleQuoteC)))     (\xs ->
240 |   pbind (charToken doubleQuoteC)                 (\_  -> preturn xs)))
241 | 
242 | identifierL :: Lexer Name
243 | identifierL = pbind _alphaL                                          (\x  ->
244 |               pbind (many (alternative alphaNumL (charOneOf "_'#"))) (\xs ->
245 |               preturn (Cons x xs)))
246 | 
247 | literalL :: Lexer Literal
248 | literalL = choice
249 |   [ pfmap IntL intL
250 |   , pfmap ChrL charLiteralL
251 |   , pfmap StrL stringLiteralL ]
252 | 
253 | specialL :: Lexer Special
254 | specialL = choice
255 |   [ preplace LParen    (charToken  '(' )
256 |   , preplace RParen    (charToken  ')' )
257 |   , preplace LBrace    (charToken  '{' )
258 |   , preplace RBrace    (charToken  '}' )
259 |   , preplace LBracket  (charToken  '[' )
260 |   , preplace RBracket  (charToken  ']' )
261 |   , preplace Arrow     (charTokens "->")
262 |   , preplace DArrow    (charTokens "=>")
263 |   , preplace HasType   (charTokens "::")
264 |   , preplace Comma     (charToken  ',' )
265 |   , preplace Semicolon (charToken  ';' )
266 |   , preplace EqualSign (charToken  '=' )
267 |   , preplace Pipe      (charToken  '|' )
268 |   , preplace Lambda    (charToken  backslashC)
269 |   , preplace Dot       (charToken  '.' )
270 |   ]
271 | 
272 | lexemeL :: Lexer Token
273 | lexemeL = choice
274 |   [ pfmap    LitTok   literalL
275 |   , pfmap    SpecTok  specialL
276 |   , pfmap    VarTok   identifierL
277 |   , preplace WhiteTok spaces1
278 |   ]
279 | 
280 | -- | 0x0A, or 0x0D, or 0x0D,0x0A.
281 | eol_ :: Lexer Unit
282 | eol_ = alternative linux windows where
283 |   { linux   = preplace Unit (charToken newlineC)
284 |   ; windows = preplace Unit (pseq (charToken carriageReturnC) (optional (charToken newlineC))) }
285 | 
286 | eol :: Lexer Unit
287 | eol = alternative eol_ eof
288 | 
289 | eolIndent :: Lexer Int
290 | eolIndent = alternative (pseq eol_ spaces0) (preplace 0 eof)
291 | 
292 | -- | with EOL
293 | commentL :: Lexer String
294 | commentL = ppost commentL' eol
295 | 
296 | -- | without EOL
297 | commentL' :: Lexer String
298 | commentL' = choice [ comment1 , comment2 , comment3 ] where
299 |   { comment1 = pseq (charTokens "--" ) (many (charNoneOf [newlineC,carriageReturnC]))
300 |   ; comment2 = pseq (charTokens "{-#") (many (charNoneOf [newlineC,carriageReturnC]))
301 |   ; comment3 = pseq (charTokens "#"  ) (many (charNoneOf [newlineC,carriageReturnC]))
302 |   }
303 | 
304 | -- | We need to hide some stuff (for example @include@-s) from GHC
305 | nanoPragmaL :: Lexer LexBlock
306 | nanoPragmaL = 
307 |   pbind (charTokens "{-%")                                         (\_  -> 
308 |   pbind (many1 (locatedL lexemeL))                                 (\ln -> 
309 |   pbind (charTokens "%-}")                                         (\_  ->
310 |   pbind (ppost (many (charNoneOf [newlineC,carriageReturnC])) eol) (\_  -> preturn ln))))
311 | 
312 | -- | Note: this accepts "eof"!
313 | emptyLineL :: Lexer Unit
314 | emptyLineL = pseq spaces0 eol
315 | 
316 | type LexBlock = List LToken
317 | 
318 | -- | Parser a line and all following indented lines
319 | blockL :: Lexer LexBlock
320 | blockL = worker1 Fake where
321 |   { line      = alternative comment (many1 (locatedL lexemeL))
322 |   ; comment   = pseq commentL' (preturn Nil)
323 |   ; worker  _ = pbind eolIndent (\k -> ifte (gt k 0) (option Nil (worker1 Fake)) (preturn Nil))
324 |   ; worker1 _ = pbind line      (\ls1 -> pbind (worker Fake) (\ls2 -> preturn (append ls1 ls2)))
325 |   }
326 | 
327 | blockOrCommentL :: Lexer (Maybe LexBlock)
328 | blockOrCommentL = choice
329 |   [ preplace Nothing commentL
330 |   , preplace Nothing emptyLineL
331 |   , pfmap    Just    nanoPragmaL
332 |   , pfmap    Just    blockL
333 |   ]
334 | 
335 | programL :: Lexer (List LexBlock)
336 | programL = pfmap catMaybes (manyTill eof blockOrCommentL)
337 | 
338 | lexer :: FilePath -> String -> List LexBlock
339 | lexer fname input = runParser_ fname programL (addLocations fname input)
340 | 
341 | --------------------------------------------------------------------------------
342 | -- ** The parser
343 | 
344 | type Parse a = Parser Token a
345 | 
346 | parseTopLevelBlock :: FilePath -> LexBlock -> TopLevel
347 | parseTopLevelBlock fname tokens = runParser_ fname topLevelP (filterWhite tokens)
348 | 
349 | filterWhite :: LexBlock -> LexBlock
350 | filterWhite = filter cond where { cond ltok = isNotWhite (located ltok) }
351 | 
352 | keywords :: List String
353 | keywords = [ "where" , "case" , "of" , "let" , "let_" , "in" , "module" , "import" , "include" , "data" , "type" ]
354 | 
355 | isKeyword :: String -> Bool
356 | isKeyword str = elem str keywords
357 | 
358 | topLevelP :: Parse TopLevel
359 | topLevelP = choice
360 |   [ tyAliasP
361 |   , dataDeclP
362 |   , importP
363 |   , includeP
364 |   , moduleP
365 |   , tyDeclP
366 |   , pfmap TopDefin ldefinP
367 |   ]
368 | 
369 | tyDeclP :: Parse TopLevel
370 | tyDeclP = pbind  varP            (\name ->
371 |           pbind (specP HasType ) (\_    ->
372 |           pbind (many1 anyToken) (\toks -> preturn (TopTyDecl name toks) )))
373 | 
374 | dataDeclP :: Parse TopLevel
375 | dataDeclP = pbind (keywordP "data") (\_ ->
376 |             pbind (many1 anyToken ) (\toks -> preturn (TopDataDecl toks) ))
377 | 
378 | tyAliasP :: Parse TopLevel
379 | tyAliasP = pbind (keywordP "type") (\_ ->
380 |            pbind (many1 anyToken ) (\toks -> preturn (TopTyAlias toks) ))
381 | 
382 | -- haskell import
383 | importP :: Parse TopLevel
384 | importP = pbind (keywordP "import") (\_ ->
385 |           pbind (many1 anyToken   ) (\toks -> preturn (TopImport toks) ))
386 | 
387 | -- our C-style include
388 | includeP :: Parse TopLevel
389 | includeP = pbind (keywordP "include") (\_ ->
390 |            pbind (accept mbStrLit   ) (\fname -> preturn (TopInclude fname) ))
391 | 
392 | moduleP :: Parse TopLevel
393 | moduleP = pbind (keywordP "module") (\_ ->
394 |           pbind (many1 anyToken   ) (\toks -> preturn (TopModule toks) ))
395 | 
396 | definP :: Parse DefinE
397 | definP f p t = 
398 |   pbind varP              (\name ->
399 |   pbind (many varP)       (\args ->
400 |   pbind (specP EqualSign) (\_    ->
401 |   pbind exprP             (\body -> preturn (Defin name (lamsE args body)) ))))  f p t 
402 | 
403 | ldefinP :: Parse LDefinE
404 | ldefinP f p t = locatedL definP f p t
405 | 
406 | exprP :: Parse Expr
407 | exprP f p t = 
408 |   pbind nakedExprP             (\expr ->
409 |   pbind (optional whereBlockP) (\mb   ->
410 |   preturn (case mb of { Nothing -> expr ; Just defs -> RecE defs expr } )))  f p t 
411 | 
412 | whereBlockP :: Parse (List LDefinE)
413 | whereBlockP f p t = pbind (keywordP "where") (\_ -> blockP) f p t 
414 | 
415 | -- | Here \"naked\" means without a where block
416 | nakedExprP :: Parse Expr
417 | nakedExprP f p t = choice
418 |   [ lamExprP
419 |   , pfmap listAppsE (many1 atomP)
420 |   ]  f p t 
421 | 
422 | -- | We need an explicit eta-expansion here so that it doesn't loop in GHCi
423 | -- (and probably also itself)
424 | atomP :: Parse Expr
425 | atomP f p t = choice
426 |   [ inParensP exprP
427 |   , pfmap LitE  literalP
428 |   , pfmap ListE listExprP
429 |   , caseExprP
430 |   , letsExprP , letRecExprP
431 |   , pfmap VarE (locatedL varP)
432 |   ] f p t 
433 | 
434 | specP :: Special -> Parse Special
435 | specP spec = preplace spec (token (SpecTok spec))
436 | 
437 | literalP :: Parse Literal
438 | literalP = accept mbLit
439 | 
440 | -- capitalIdentP :: Parse Name
441 | -- capitalIdentP = accept mbVarU
442 | 
443 | identP :: Parse Name
444 | identP = accept mbVar
445 | 
446 | -- | This does not accepts they keywords, but accepts constructors
447 | varP :: Parse Name
448 | varP = pbind identP (\v -> ifte (isKeyword v) pfail (preturn v))
449 | 
450 | -- | This only accept uppercase identifiers
451 | conP :: Parse Name
452 | conP = accept mbVarU
453 | 
454 | keywordP :: String -> Parse String
455 | keywordP word = pbind identP (\v -> ifte (stringEq v word) (preturn word) pfail)
456 | 
457 | inParensP :: Parse a -> Parse a
458 | inParensP p = pbind (specP LParen) (\_ ->
459 |               pbind p              (\x ->
460 |               pbind (specP RParen) (\_ -> preturn x)))
461 | 
462 | listExprP  :: Parse (List Expr   )
463 | tupleExprP :: Parse (List Expr   )
464 | blockP     :: Parse (List LDefinE)
465 | branchesP  :: Parse (List BranchE)
466 | 
467 | listExprP  f p t = pbind (specP LBracket) (\_ -> sepEndBy (specP Comma    ) (specP RBracket) exprP  ) f p t
468 | tupleExprP f p t = pbind (specP LParen  ) (\_ -> sepEndBy (specP Comma    ) (specP RParen  ) exprP  ) f p t
469 | blockP     f p t = pbind (specP LBrace  ) (\_ -> sepEndBy (specP Semicolon) (specP RBrace  ) ldefinP) f p t
470 | branchesP  f p t = pbind (specP LBrace  ) (\_ -> sepEndBy (specP Semicolon) (specP RBrace  ) branchP) f p t
471 | 
472 | patternP :: Parse Pattern
473 | patternP f p t = naked f p t where
474 |   { naked f p t = choice [ wild , var , inParensP patternP , apps ] f p t
475 |   ; safe  f p t = choice [ wild , var , inParensP patternP , con  ] f p t
476 |   ; wild  = pbind (keywordP "_" ) (\_    -> preturn  WildP  )
477 |   ; var   = pbind (accept mbVarL) (\v    -> preturn (VarP v))
478 |   ; con   = pbind  conP           (\con  -> preturn (AppP con Nil))
479 |   ; apps  = pbind  conP           (\con  ->
480 |             pbind (many safe    ) (\args -> preturn (AppP con args)))
481 |   }  
482 | 
483 | branchP :: Parse BranchE
484 | branchP f p t = alternative defaultBranchP branchP' f p t
485 | 
486 | branchP' :: Parse BranchE
487 | branchP' f p t = 
488 |   pbind conP          (\con  ->
489 |   pbind (many varP  ) (\args ->
490 |   pbind (specP Arrow) (\_    ->
491 |   pbind (exprP      ) (\body -> preturn (BranchE con args body))))) f p t 
492 | 
493 | defaultBranchP :: Parse BranchE
494 | defaultBranchP f p t 
495 |   = pbind (keywordP "_") (\_    ->
496 |     pbind (specP Arrow ) (\_    ->
497 |     pbind (exprP       ) (\body -> preturn (DefaultE body)))) f p t 
498 | 
499 | lamExprP :: Parse Expr
500 | lamExprP f p t  =
501 |   pbind (specP Lambda) (\_    ->
502 |   pbind (many1 varP  ) (\args ->
503 |   pbind (specP Arrow ) (\_    ->
504 |   pbind exprP          (\body -> preturn (lamsE args body))))) f p t 
505 | 
506 | letExprP' :: (List LDefinE -> Expr -> Expr) -> String -> Parse Expr
507 | letExprP' con letkw = pbind (keywordP letkw) (\_    ->
508 |                   pbind (blockP        ) (\defs ->
509 |                   pbind (keywordP "in" ) (\_    ->
510 |                   pbind (exprP         ) (\expr -> preturn (con defs expr)))))
511 | 
512 | -- | Non-recursive let
513 | letsExprP :: Parse Expr
514 | letsExprP f p t = letExprP' LetE "let_" f p t 
515 | 
516 | -- | Recursive let
517 | letRecExprP :: Parse Expr
518 | letRecExprP f p t = letExprP' RecE "let" f p t 
519 | 
520 | caseExprP :: Parse Expr
521 | caseExprP f p t  = 
522 |   pbind (keywordP "case") (\_     ->
523 |   pbind (locatedL exprP ) (\lexpr ->
524 |   pbind (keywordP "of"  ) (\_     ->
525 |   pbind (branchesP      ) (\brs   -> preturn (CaseE lexpr brs))))) f p t 
526 | 
527 | --------------------------------------------------------------------------------
528 | 


--------------------------------------------------------------------------------
/PrimGHC.hs:
--------------------------------------------------------------------------------
  1 | 
  2 | -- | Primops implemented in Haskell, so the compiler can be hosted by GHC too
  3 | 
  4 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-}
  5 | {-# LANGUAGE Strict, StrictData #-}
  6 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances, TypeFamilies #-}
  7 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-}
  8 | 
  9 | module PrimGHC where
 10 | 
 11 | import qualified Prelude
 12 | import qualified Data.Char
 13 | import qualified Data.Bits          as Bits
 14 | import qualified System.IO          as IO
 15 | import qualified System.IO.Unsafe   as Unsafe
 16 | import qualified System.Environment as Env
 17 | import qualified System.Exit        as Exit
 18 | import qualified Control.Exception  as Exc
 19 | 
 20 | import Prelude     ( Int , Char , Eq , Show )
 21 | import Data.String ( IsString(..) )
 22 | import GHC.Exts    ( IsList  (..) )
 23 | 
 24 | --------------------------------------------------------------------------------
 25 | 
 26 | import qualified Debug.Trace 
 27 | 
 28 | debug :: Show a => List Char -> a -> b -> b 
 29 | debug x y z = Debug.Trace.trace msg z where
 30 |   msg   = Prelude.concat parts
 31 |   parts :: [Prelude.String]
 32 |   parts = [ ">>> " , _toGhcString x , " => " , Prelude.show y ]
 33 | 
 34 | debug_ :: Show b => List Char -> b -> b 
 35 | debug_ x y = debug x y y
 36 | 
 37 | --------------------------------------------------------------------------------
 38 | -- * Primitive types used by the primops
 39 | 
 40 | data Unit = Unit deriving Show
 41 | 
 42 | data Bool = False | True deriving Show
 43 | 
 44 | data List a = Nil | Cons a (List a) deriving (Eq) 
 45 | 
 46 | type String = List Char
 47 | 
 48 | data Maybe a = Nothing | Just a deriving Show
 49 | 
 50 | type Handle = IO.Handle
 51 | 
 52 | data Pair a b = Pair a b deriving Show
 53 | 
 54 | --------------------------------------------------------------------------------
 55 | -- * IO support
 56 | 
 57 | type IO a = IO.IO a
 58 | 
 59 | ioreturn :: a -> IO a
 60 | ioreturn = Prelude.return
 61 | 
 62 | iobind :: IO a -> (a -> IO b) -> IO b
 63 | iobind = (Prelude.>>=)
 64 | 
 65 | {-# NOINLINE runIO #-}
 66 | runIO :: IO a -> IO.IO a
 67 | runIO action = do
 68 |   Prelude.putStrLn "[rts version = GHC]"
 69 |   action
 70 | 
 71 | --------------------------------------------------------------------------------
 72 | -- * Built-ins \/ primops
 73 | 
 74 | negate :: Int -> Int
 75 | negate = Prelude.negate
 76 | 
 77 | plus :: Int -> Int -> Int
 78 | plus = (Prelude.+)
 79 | 
 80 | minus :: Int -> Int -> Int
 81 | minus = (Prelude.-)
 82 | 
 83 | times :: Int -> Int -> Int
 84 | times = (Prelude.*)
 85 | 
 86 | div :: Int -> Int -> Int
 87 | div = Prelude.div
 88 | 
 89 | mod :: Int -> Int -> Int
 90 | mod = Prelude.mod
 91 | 
 92 | bitAnd :: Int -> Int -> Int
 93 | bitAnd = (Bits..&.)
 94 | 
 95 | bitOr :: Int -> Int -> Int
 96 | bitOr = (Bits..|.)
 97 | 
 98 | bitXor :: Int -> Int -> Int
 99 | bitXor = Bits.xor
100 | 
101 | shiftL :: Int -> Int -> Int
102 | shiftL = Bits.shiftL
103 | 
104 | shiftR :: Int -> Int -> Int
105 | shiftR = Bits.shiftR
106 | 
107 | chr :: Int -> Char
108 | chr = Data.Char.chr
109 | 
110 | ord :: Char -> Int
111 | ord = Data.Char.ord
112 | 
113 | -- | If-then-else *must* be lazy. Hence for now it is a primop
114 | ifte :: Bool -> a -> a -> a
115 | ifte b ~x ~y = case b of { True -> x ; False -> y }
116 | 
117 | -- | It is useful if @and@ \/ @or@ shortcircuits, hence they are primops
118 | and :: Bool -> Bool -> Bool
119 | and x ~y = case x of { True -> y ; False -> False }
120 | 
121 | or :: Bool -> Bool -> Bool
122 | or x ~y = case x of { True -> True ; False -> y }
123 | 
124 | -- | @not@ could be user-defined, but @and@, @or@ are already primops, and it's faster this way
125 | not :: Bool -> Bool
126 | not x = case x of { True -> False ; False -> True }
127 | 
128 | geq :: Eq a => a -> a -> Bool
129 | geq x y = _fromGhcBool ((Prelude.==) x y)
130 | 
131 | eq :: Int -> Int -> Bool
132 | eq x y = _fromGhcBool ((Prelude.==) x y)
133 | 
134 | lt :: Int -> Int -> Bool
135 | lt x y = _fromGhcBool ((Prelude.<)  x y)
136 | 
137 | le :: Int -> Int -> Bool
138 | le x y = _fromGhcBool ((Prelude.<=) x y)
139 | 
140 | --------------------------------------------------------------------------------
141 | 
142 | {-# NOINLINE error #-}
143 | error :: String -> a
144 | error msg = Prelude.error (_toGhcString msg)
145 | 
146 | print :: Show a => a -> IO Unit
147 | print what = (Prelude.>>) (Prelude.print what) (Prelude.return Unit)
148 | 
149 | --------------------------------------------------------------------------------
150 | -- * IO monad
151 | 
152 | getChar :: IO (Maybe Char)
153 | getChar = Exc.catch just handler where
154 |   just :: IO.IO (Maybe Char)
155 |   just =  (Prelude.>>=) Prelude.getChar (\c -> Prelude.return (Just c))
156 |   handler :: Exc.IOException -> IO.IO (Maybe Char)
157 |   handler _ = Prelude.return Nothing
158 | 
159 | putChar :: Char -> IO Unit
160 | putChar c = (Prelude.>>) (Prelude.putChar c) (Prelude.return Unit)
161 | 
162 | exit :: Int -> IO Unit
163 | exit 0 = Exit.exitWith  Exit.ExitSuccess   
164 | exit k = Exit.exitWith (Exit.ExitFailure k)
165 | 
166 | getArg :: Int -> IO (Maybe String)
167 | getArg m = Prelude.fmap (index m) (Env.getArgs) where
168 |   index _ [] = Nothing
169 |   index k (this:rest) = case k of 
170 |     0 -> Just (_fromGhcString this) 
171 |     _ -> index ((Prelude.-) k 1) rest 
172 | 
173 | ----------------------------------------
174 | 
175 | stdin :: Handle
176 | stdin = IO.stdin
177 | 
178 | stdout :: Handle
179 | stdout = IO.stdout
180 | 
181 | stderr :: Handle
182 | stderr = IO.stderr
183 | 
184 | data IOMode
185 |   = ReadMode   
186 |   | WriteMode  
187 |   | AppendMode   
188 |   | ReadWriteMode 
189 |   deriving Show
190 | 
191 | _toGhcIOMode :: IOMode -> IO.IOMode
192 | _toGhcIOMode mode = case mode of
193 |   ReadMode      -> IO.ReadMode       
194 |   WriteMode     -> IO.WriteMode     
195 |   AppendMode    -> IO.AppendMode    
196 |   ReadWriteMode -> IO.ReadWriteMode 
197 | 
198 | openFile :: String -> IOMode -> IO Handle
199 | openFile fn mode = IO.openFile (_toGhcString fn) (_toGhcIOMode mode)
200 | 
201 | hClose :: Handle -> IO Unit
202 | hClose h = (Prelude.>>) (IO.hClose h) (Prelude.return Unit) 
203 | 
204 | hGetChar :: Handle -> IO (Maybe Char)
205 | hGetChar h = Exc.catch just handler where
206 |   just :: IO.IO (Maybe Char)
207 |   just =  (Prelude.>>=) (IO.hGetChar h) (\c -> Prelude.return (Just c))
208 |   handler :: Exc.IOException -> IO.IO (Maybe Char)
209 |   handler _ = Prelude.return Nothing
210 | 
211 | hPutChar :: Handle -> Char -> IO Unit
212 | hPutChar h c = (Prelude.>>) (IO.hPutChar h c) (Prelude.return Unit) 
213 | 
214 | hPutStr :: Handle -> String -> IO Unit
215 | hPutStr h s = (Prelude.>>) (IO.hPutStr h (_toGhcString s)) (Prelude.return Unit) 
216 | 
217 | --------------------------------------------------------------------------------
218 | -- * Marshalling to\/from standard Haskell types
219 | 
220 | _fromGhcBool :: Prelude.Bool -> Bool
221 | _fromGhcBool b = case b of { Prelude.True -> True ; Prelude.False -> False }
222 | 
223 | _fromGhcList :: [a] -> List a
224 | _fromGhcList = go where { go [] = Nil ; go (x:xs) = Cons x (go xs) }
225 | 
226 | _toGhcList :: List a -> [a]
227 | _toGhcList = go where { go Nil = [] ; go (Cons x xs) =  x : (go xs) }
228 | 
229 | _fromGhcString :: Prelude.String -> String
230 | _fromGhcString = _fromGhcList
231 | 
232 | _toGhcString :: String -> Prelude.String
233 | _toGhcString = _toGhcList
234 | 
235 | show :: Show a => a -> String
236 | show x = _fromGhcString (Prelude.show x)
237 | 
238 | instance Show a => Show (List a) where show list = Prelude.show (_toGhcList list)
239 | 
240 | instance IsString (List Char) where fromString = _fromGhcString
241 | 
242 | instance IsList (List a) where
243 |   type (Item (List a)) = a
244 |   fromList = _fromGhcList
245 |   toList   = _toGhcList
246 | 
247 | --------------------------------------------------------------------------------
248 | 


--------------------------------------------------------------------------------
/PrimOps.hs:
--------------------------------------------------------------------------------
  1 | 
  2 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-}
  3 | {-# LANGUAGE Strict #-}
  4 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-}
  5 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-}
  6 | 
  7 | module PrimOps where
  8 | 
  9 | --------------------------------------------------------------------------------
 10 | 
 11 | import Prelude ( Int , Char , Eq , Show )
 12 | import PrimGHC
 13 | 
 14 | --------------------------------------------------------------------------------
 15 | 
 16 | import Base
 17 | import Containers
 18 | import Types
 19 | 
 20 | {-% include "Base.hs"       %-}
 21 | {-% include "Containers.hs" %-}
 22 | {-% include "Types.hs"      %-}
 23 | 
 24 | --------------------------------------------------------------------------------
 25 | -- ** Primops
 26 | 
 27 | data Prim
 28 |   = Negate | Plus | Minus | Times | Div | Mod | Chr | Ord 
 29 |   | BitAnd | BitOr | BitXor | ShiftL | ShiftR
 30 |   | IFTE | Not | And | Or | GenEQ | IntEQ | IntLT | IntLE
 31 |   | GetChar | PutChar | GetArg | Exit | Error | Print | RunIO | IOBind | IOReturn 
 32 |   | OpenFile | HClose | HGetChar | HPutChar | HPutStr | StdIn | StdOut | StdErr
 33 |   deriving (Eq,Show)
 34 | 
 35 | isLazyPrim :: Prim -> Bool
 36 | isLazyPrim prim = case prim of
 37 |   { IFTE -> True
 38 |   ; And  -> True
 39 |   ; Or   -> True 
 40 |   ; _    -> False }
 41 | 
 42 | showPrim :: Prim -> String
 43 | showPrim prim = case prim of
 44 |   { Negate   -> "Negate"   ; Plus     -> "Plus"     ; Minus   -> "Minus"
 45 |   ; Times    -> "Times"    ; Div      -> "Div"      ; Mod     -> "Mod"
 46 |   ; BitAnd   -> "BitAnd"   ; BitOr    -> "BitOr"    ; BitXor  -> "BitXor"
 47 |   ; ShiftL   -> "ShiftL"   ; ShiftR   -> "ShiftR"    
 48 |   ; Chr      -> "Chr"      ; Ord      -> "Ord"      ; IFTE    -> "IFTE"
 49 |   ; Not      -> "Not"      ; And      -> "And"      ; Or      -> "Or"
 50 |   ; IntEQ    -> "IntEQ"    ; IntLT    -> "IntLT"    ; IntLE   -> "IntLE"
 51 |   ; GenEQ    -> "GenEQ"    ; Error    -> "Error"    ; Exit    -> "Exit" 
 52 |   ; GetChar  -> "GetChar"  ; PutChar  -> "PutChar"  ; GetArg  -> "GetArg" 
 53 |   ; StdIn    -> "StdIn"    ; StdOut   -> "StdOut"   ; StdErr  -> "StdErr" 
 54 |   ; HGetChar -> "HGetChar" ; HPutChar -> "HPutChar" ; HClose  -> "HClose" 
 55 |   ; OpenFile -> "OpenFile" ; HPutStr  -> "HPutStr"  ; Print   -> "Print" 
 56 |   ; IOBind   -> "IOBind"   ; IOReturn -> "IOReturn" ; RunIO   -> "RunIO"    }
 57 | 
 58 | data PrimOp = PrimOp Arity Prim deriving Show
 59 | 
 60 | thePrimOps :: Mode -> Trie PrimOp
 61 | thePrimOps mode = let { io m = case mode of { Compile -> inc m ; Interpret -> id m } } in trieFromList
 62 |   [ Pair "error"     (PrimOp  1      Error   )
 63 |   , Pair "negate"    (PrimOp  1      Negate  )
 64 |   , Pair "plus"      (PrimOp  2      Plus    )
 65 |   , Pair "minus"     (PrimOp  2      Minus   )
 66 |   , Pair "times"     (PrimOp  2      Times   )
 67 |   , Pair "div"       (PrimOp  2      Div     )
 68 |   , Pair "mod"       (PrimOp  2      Mod     )
 69 |   , Pair "bitAnd"    (PrimOp  2      BitAnd  ) 
 70 |   , Pair "bitOr"     (PrimOp  2      BitOr   )
 71 |   , Pair "bitXor"    (PrimOp  2      BitXor  ) 
 72 |   , Pair "shiftL"    (PrimOp  2      ShiftL  )
 73 |   , Pair "shiftR"    (PrimOp  2      ShiftR  )
 74 |   , Pair "chr"       (PrimOp  1      Chr     )
 75 |   , Pair "ord"       (PrimOp  1      Ord     )
 76 |   , Pair "ifte"      (PrimOp  3      IFTE    )
 77 |   , Pair "not"       (PrimOp  1      Not     )
 78 |   , Pair "and"       (PrimOp  2      And     )
 79 |   , Pair "or"        (PrimOp  2      Or      )
 80 |   , Pair "geq"       (PrimOp  2      GenEQ   )
 81 |   , Pair "eq"        (PrimOp  2      IntEQ   )
 82 |   , Pair "lt"        (PrimOp  2      IntLT   )
 83 |   , Pair "le"        (PrimOp  2      IntLE   )
 84 |   , Pair "getChar"   (PrimOp (io 0)  GetChar )
 85 |   , Pair "putChar"   (PrimOp (io 1)  PutChar )
 86 |   , Pair "getArg"    (PrimOp (io 1)  GetArg  )
 87 |   , Pair "exit"      (PrimOp (io 1)  Exit    )
 88 |   , Pair "openFile"  (PrimOp (io 2)  OpenFile)
 89 |   , Pair "hClose"    (PrimOp (io 1)  HClose  )
 90 |   , Pair "hGetChar"  (PrimOp (io 1)  HGetChar)
 91 |   , Pair "hPutChar"  (PrimOp (io 2)  HPutChar)
 92 |   , Pair "hPutStr"   (PrimOp (io 2)  HPutStr )
 93 |   , Pair "print"     (PrimOp (io 1)  Print   )
 94 |   , Pair "iobind"    (PrimOp (io 2)  IOBind  )
 95 |   , Pair "ioreturn"  (PrimOp (io 1)  IOReturn)
 96 |   , Pair "stdin"     (PrimOp  0      StdIn   )
 97 |   , Pair "stdout"    (PrimOp  0      StdOut  )
 98 |   , Pair "stderr"    (PrimOp  0      StdErr  )
 99 |   , Pair "runIO"     (PrimOp  1      RunIO   )
100 |   ]
101 | 
102 | --------------------------------------------------------------------------------
103 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | NanoHaskell: a self-hosting lambda calculus compiler
  3 | ====================================================
  4 | 
  5 | The goal of this experiment is to create a self-hosting lambda calculus
  6 | compiler (and interpreter) in a minimal amount of Haskell-style code.
  7 | 
  8 | The language is (strict) lambda calculus + data constructors + simple
  9 | pattern matching + recursive lets + IO effects. The syntax is chosen so that 
 10 | a program can be also a valid Haskell program at the same time (this makes 
 11 | development much easier).
 12 | 
 13 | Haskell features like type signatures, data type declarations and imports
 14 | are parsed (well, recognized...), but then ignored.
 15 | 
 16 | 
 17 | Current status
 18 | --------------
 19 | 
 20 | * it compiles via GHC, both with and without optimizations
 21 | * it self-hosts, both with and without optimizations
 22 | * it needs a large C stack (32+ Mb) + GCC optims (because of the lack of tail call elimination)
 23 | * source code: about 2000 "essential" lines + 560 lines of type annotations; the C runtime is \~650 lines
 24 |   (including some debugging features)
 25 | * the interpreter is not working 100% correctly at the moment
 26 | 
 27 | 
 28 | Usage
 29 | -----
 30 | 
 31 |     $ nanohs -c input.hs output.c            # compile with optimizations disabled
 32 |     $ nanhos -o input.hs output.c            # compile with optimizations enabled
 33 |     $ nanhos -i input.hs [arg1 [arg2 ...]]   # interpret
 34 | 
 35 | Or you can just use `runghc`:
 36 | 
 37 |     $ runghc Nano.hs -c examples/church.nano tmp.c ; gcc tmp.c ; ./a.out
 38 | 
 39 | 
 40 | ### Imports
 41 | 
 42 | Haskell imports are ignored, but you can use C-style includes with the pragma:
 43 | 
 44 |     {-% include "othermodule.hs" %-}
 45 | 
 46 | 
 47 | The surface language
 48 | --------------------
 49 | 
 50 | The idea is to use a subset of Haskell syntax, so that the same
 51 | program can also be compiled / interpreted by GHC.
 52 | 
 53 | * no static type system (untyped lambda calculus) - but maybe there should be a type checker after all?
 54 | * no data type declarations (constructors are arbitrary capitalized names)
 55 | * no module system - instead, C-style includes
 56 | * strict language (if-then-else must be lazy though; `and` / `or` shortcuts too)
 57 | * ML-style side effects (but only used for IO, which is then wrapped into a monad)
 58 | * only simple pattern matching + default branch (TODO: nested patterns)
 59 | * no infix operators
 60 | * list construction syntax `[a,b,c]` is supported
 61 | * no indentation syntax (only curly braces), except for top-level blocks
 62 | * only line comments, starting at the beginning of the line
 63 | * built-in data types: `Int`, `Char`, `Bool`, `List`, `Maybe`, etc - those required by the primops
 64 | * universal polymorphic equality comparison primop (?)
 65 | * no escaping in character / string constants (TODO: maybe it's worth to do escaping?)
 66 | * basic IO: standard input / output, basic file handling, early exit, command line arguments 
 67 | 
 68 | We can make the same source files to be accepted by both GHC and
 69 | itself by recognizing and ignoring GHC-specific lines (pragmas, imports,
 70 | type signatures, datatype declarations, type synonyms). We just put
 71 | primops implementations into a PrimGHC module (as imports are ignored).
 72 | 
 73 | We could in theory have several backends:
 74 | 
 75 | * C99 on 64 bit architectures
 76 | * TODO: x86-64 assembly
 77 | * TODO: some simple bytecode virtual machine
 78 | 
 79 | For bootstrapping philosophy it seems to be useful to have a very simple virtual 
 80 | machine, for which an interpreter can be very easily written in C or any other 
 81 | language.
 82 | 
 83 | 
 84 | Compilation pipeline
 85 | --------------------
 86 | 
 87 | 1. lexer
 88 | 2. parser
 89 | 3. partition recursive lets using dependency analysis
 90 | 4. recognize primops
 91 | 5. TODO: eliminate pattern matching into simple branching on constructors
 92 | 6. collect data constructors
 93 | 7. scope checking & conversion to core language
 94 | 8. inline small functions + beta reduce + eliminate unused lets
 95 | 9. closure conversion
 96 | 10. TODO: compile to some low-level intermediate language
 97 | 11. final code generation (TODO: different backends)
 98 | 
 99 | 
100 | Runtime system
101 | --------------
102 | 
103 | There is an "enviroment" stack separate from the CPU stack. This makes it
104 | very easy to find GC roots: just walk through the stack. The stack contains
105 | pointers to the heap (with the optimization that small heap objects, fitting
106 | into 61 bits, are not actually allocated).
107 | 
108 | On the heap there are only two kind of objects, closures and data constructors:
109 |  
110 | * data constructor (needs: tag + arity)
111 | * closure / partial application (needs: static function pointer / index, 
112 |   number of applied arguments, number of remaining arguments)
113 | 
114 | Heap pointers are also tagged:
115 | 
116 | * normal heap pointer (closure or data constructor)
117 | * 61 bit literals (int / char)
118 | * nullary constructors
119 | * static function pointers
120 | * foreign pointers (used for file handles in the C runtime)
121 | 
122 | There are no thunks on the heap because we are strict.
123 | 
124 | The garbage collector is a very simple copying (compacting) GC.
125 | 
126 | 
127 | Implementation details
128 | ----------------------
129 | 
130 | There are some minor tricks you should be aware of if you try to read the code.
131 | 
132 | ### Argument order
133 | 
134 | The order of function arguments on the stack, the captured variables in closures 
135 | and also the order of constructor arguments on heap are all reversed compared to 
136 | the "logical" (source code) order. This makes the implementation of application
137 | much simpler.
138 | 
139 |     [ Cons_tag    argN ... arg2 arg1 ]                   # data constructor heap object
140 |     [ Closure_tag envK ... env2 env1 ]                   # closure heap object 
141 |     [ ... | argN ... arg1 envK ... env1 | undefined ]    # stack when calling a static function
142 |           ^ BP                          ^ SP
143 | 
144 | Note: our stack grows "upwards" (unlike the CPU stack which grows "downwards").
145 | 
146 | ### IO monad
147 | 
148 | There is an IO monad, which in the GHC runtime and the interpreted runtime is
149 | the host's IO monad, while in the compiled code it is encoded with functions
150 | having side effects:
151 | 
152 |     type IO a = ActionToken -> a
153 | 
154 | You need to begin your `main` function with an explicit `runIO` call (this is
155 | useful while debugging, as main can be just a simple expression instead).
156 | 
157 | 
158 | Organization of source code
159 | ---------------------------
160 | 
161 |     Base.hs          - base library / prelude
162 |     Closure.hs       - closure conversion
163 |     CodeGen.hs       - code generation
164 |     Containers.hs    - container data structures 
165 |     Core.hs          - core language
166 |     DataCon.hs       - data constructors
167 |     Dependency.hs    - reordering lets using the dependency graph
168 |     Eval.hs          - interpreter
169 |     Inliner.hs       - inliner + basic optimizations
170 |     Nano.hs          - main executable
171 |     Parser.hs        - parser
172 |     PrimGHC.hs       - the primops implemented in Haskell (so that GHC can host it) 
173 |     PrimOps.hs       - primops
174 |     ScopeCheck.hs    - scope checking + conversion to core
175 |     Syntax.hs        - surface syntax
176 |     Types.hs         - common types
177 |     rts.c            - the runtime system implemented in C
178 |     bootstrap.sh     - shell script to bootstrap the compiler
179 |     sloc_count.sh    - shell script to measure source code size
180 |  
181 | 


--------------------------------------------------------------------------------
/ScopeCheck.hs:
--------------------------------------------------------------------------------
  1 | 
  2 | -- | Scope checking and conversion to core
  3 | 
  4 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-}
  5 | {-# LANGUAGE Strict #-}
  6 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-}
  7 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-}
  8 | 
  9 | module ScopeCheck where
 10 | 
 11 | --------------------------------------------------------------------------------
 12 | 
 13 | import Prelude ( Int , Char , Eq , Show )
 14 | import PrimGHC
 15 | 
 16 | --------------------------------------------------------------------------------
 17 | 
 18 | import Base
 19 | import Containers
 20 | import Types
 21 | import PrimOps
 22 | import DataCon
 23 | import Syntax
 24 | import Core
 25 | 
 26 | {-% include "Base.hs"       %-}
 27 | {-% include "Containers.hs" %-}
 28 | {-% include "Types.hs"      %-}
 29 | {-% include "PrimOps.hs"    %-}
 30 | {-% include "DataCon.hs"    %-}
 31 | {-% include "Syntax.hs"     %-}
 32 | {-% include "Core.hs"       %-}
 33 | 
 34 | --------------------------------------------------------------------------------
 35 | -- * Config
 36 | 
 37 | -- | Name of the entry point
 38 | nanoMainIs :: Name
 39 | nanoMainIs = "main"
 40 | 
 41 | --------------------------------------------------------------------------------
 42 | -- * conversion to core
 43 |    
 44 | exprToCore :: Trie PrimOp -> DataConTable -> Scope -> Expr -> Term
 45 | exprToCore primops dconTable iniScope expr = scopeCheck dconTable iniScope (recogPrimApps1 primops expr)
 46 | 
 47 | programToCoreProgram :: Mode -> DataConTable -> Program Expr -> CoreProgram
 48 | programToCoreProgram mode dconTable blocks = CorePrg (map worker blocks) mainIdx mainTerm where
 49 |   { primops      = thePrimOps mode
 50 |   ; duplicate n  = concat [ "multiple declaration of " , quoteString n ]
 51 |   ; defins_      = map located (forgetBlockStructure blocks)
 52 |   ; topLevScope  = trieFromListUnique duplicate (zipWithIndex (\n i -> Pair n (TopL i)) (map definedName defins_))
 53 |   ; worker block = case block of 
 54 |       { NonRecursive defin  -> NonRecursive (     fmapLDefin (exprToCore primops dconTable topLevScope)  defin )
 55 |       ; Recursive    defins -> Recursive    (map (fmapLDefin (exprToCore primops dconTable topLevScope)) defins) }
 56 |   ; no_main_err = \_ -> error (concat [ "entry point " , quoteString nanoMainIs , " not found" ]) 
 57 |   ; mainIdx = case trieLookup nanoMainIs topLevScope of { Just varl -> case varl of
 58 |       { TopL k -> k ; _ -> no_main_err Unit } ; _ -> no_main_err Unit }  
 59 |   ; mainTerm = AtmT (VarA (Named nanoMainIs (TopV mainIdx))) }
 60 | 
 61 | --------------------------------------------------------------------------------
 62 | -- ** Scope checking
 63 | 
 64 | data VarL 
 65 |   = LevL Level 
 66 |   | TopL Int
 67 |   deriving Show
 68 | 
 69 | type Scope = Trie VarL
 70 | 
 71 | lookupVar :: Level -> Scope -> Location -> String -> Var
 72 | lookupVar level scope loc name =
 73 |   case trieLookup name scope of 
 74 |     { Just v  -> case v of { LevL k -> IdxV (dec (minus level k)) ; TopL j -> TopV j } 
 75 |     ; Nothing -> error (concat [ "variable name " , quoteString name , " not in scope, at " , showLocation loc ])}
 76 | 
 77 | lookupCon :: DataConTable -> String -> Con
 78 | lookupCon dcontable name =
 79 |   case trieLookup name dcontable of { Just con -> con ; Nothing ->
 80 |     error (concat [ "fatal error: constructor " , quoteString name , " not found" ])}
 81 | 
 82 | scopeCheck :: DataConTable -> Scope -> Expr -> Term
 83 | scopeCheck dcontable = go 0 where
 84 |   { mbAtom level scope expr =  case expr of
 85 |       { VarE  lname -> case lname of { Located loc name -> case isDCon name of
 86 |         { True  -> Just (ConA (Named name (lookupCon dcontable       name)))
 87 |         ; False -> Just (VarA (Named name (lookupVar level scope loc name))) }}
 88 |       ; LitE  lit -> case lit of
 89 |         { StrL cs -> Nothing
 90 |         ; _       -> Just (KstA lit) }
 91 |       ; _ -> Nothing } 
 92 |   ; go level scope expr = case expr of
 93 |     { VarE  name -> case mbAtom level scope expr of
 94 |         { Just atom -> AtmT atom
 95 |         ; Nothing   -> error "fatal error: VarE should be always an atom!" }
 96 |     ; StrE j     -> AtmT (VarA (Named (appendInt "str_" j) (StrV j)))
 97 |     ; AppE e1 e2 -> case mbAtom level scope e2 of
 98 |         { Just atom -> AppT (go level scope e1) atom
 99 |         ; Nothing   -> LetT (Named "letx" (go level scope e2)) (AppT (go (inc level) scope e1) (VarA (Named "letx" (IdxV 0)))) }
100 |     ; LamE  name body -> LamT (Named name (go (inc level) (trieInsert name (LevL level) scope) body))
101 |     ; LetE  defs body -> case defs of { Nil -> go level scope body ; Cons defin rest -> case located defin of 
102 |         { Defin name rhs -> let { tm = go level scope rhs ; scope' = trieInsert name (LevL level) scope }
103 |                             in  LetT (Named name tm) (go (inc level) scope' (LetE rest body)) } }
104 |     ; RecE  defs body -> let { n = length defs ; level' = plus level n
105 |         ; f scp nameidx = case nameidx of { Pair name j -> trieInsert name (LevL j) scp }
106 |         ; scope' = foldl f scope (zip (map ldefinedName defs) (rangeFrom level n))
107 |         } in RecT n (map (goDefin level' scope') defs) (go level' scope' body)
108 |     ; CaseE lwhat brs -> case lwhat of { Located loc what -> case mbAtom level scope what of
109 |         { Just atom -> goCase level scope loc atom brs
110 |         ; Nothing   -> LetT (Named "scrut" (go level scope what)) (goCase (inc level) scope loc (VarA (Named "scrut" (IdxV 0))) brs) }}
111 |     ; LitE  lit -> case lit of
112 |         { StrL cs -> go level scope (ListE (map (\x -> LitE (ChrL x)) cs))
113 |         ; _       -> AtmT (KstA lit) }
114 |     ; ListE exprs -> case exprs of 
115 |         { Nil       -> AtmT (ConA (Named "Nil" con_Nil))
116 |         ; Cons e es -> go level scope (AppE (AppE (VarE (fakeLocated "Cons")) e) (ListE es)) }
117 |     ; PrimE prim args -> case prim of { PrimOp _arity pri -> case isLazyPrim pri of 
118 |         { False -> goPrim prim 0 level scope Nil args 
119 |       --  { False -> goArgs level scope args (PriT prim 
120 |       --      (zipWithIndex (\i j -> VarA (Named (appendInt "name" j) (IdxV i))) (reverse (range (length args))) ))
121 |         ; True  -> LZPT prim (map (go level scope) args) }}
122 |     }
123 |   -- ; finishPrim :: PrimOp -> List (Either Term Atom) -> Int -> Term 
124 |   ; finishPrim prim theEis ofs = let 
125 |       { theVars = zipWithIndex (\i j -> VarA (Named (appendInt "parg" j) (IdxV i))) (reverse (range ofs)) 
126 |       ; nameIt var x = case var of { VarA named -> case named of { Named n _ -> Named n x }} 
127 |       ; worker eis vars atoms = case eis of { Nil -> PriT prim (reverse atoms) ; Cons ei eis' -> case ei of
128 |           { Right atom -> worker eis' vars (Cons (shiftAtomRight ofs atom) atoms) 
129 |           ; Left  term -> case vars of { Cons var vars' -> LetT (nameIt var term) (worker eis' vars' (Cons var atoms)) }}}
130 |       } in worker theEis theVars Nil
131 |   -- ; goPrim :: PrimOp -> Int -> Level -> Scope -> List (Either Term Atom) -> List Expr -> Term 
132 |   ; goPrim prim ofs level scope newargs oldargs = case oldargs of 
133 |       { Nil            -> finishPrim prim (reverse newargs) ofs 
134 |       ; Cons this rest -> case mbAtom (minus level ofs) scope this of 
135 |         { Just atom -> goPrim prim      ofs       level  scope (Cons (Right atom                 ) newargs) rest 
136 |         ; Nothing   -> goPrim prim (inc ofs) (inc level) scope (Cons (Left  (go level scope this)) newargs) rest }}
137 |   -- ; goArgs level scope args body = case args of 
138 |   --     { Nil            -> body 
139 |   --     ; Cons this rest -> LetT (go level scope this) (goArgs (inc level) scope rest body) }
140 |   ; goDefin level scope defin = case located defin of { Defin name what -> Named name (go level scope what) }
141 |   ; goCase level scope loc var branches = CasT (Located loc var) (map goBranch branches) where
142 |     { goBranch branch = case branch of
143 |         { DefaultE         rhs -> DefaultT (go level scope rhs)
144 |         ; BranchE con args rhs -> let { n = length args ; level' = plus level n
145 |           ; f scp nameidx = case nameidx of { Pair name j -> trieInsert name (LevL j) scp }
146 |           ; scope' = foldl f scope (zip args (rangeFrom level n))
147 |           } in BranchT (Named con (lookupCon dcontable con)) n (go level' scope' rhs) } } }
148 | 
149 | --------------------------------------------------------------------------------
150 | 


--------------------------------------------------------------------------------
/Syntax.hs:
--------------------------------------------------------------------------------
  1 | 
  2 | -- | Surface syntax
  3 | 
  4 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-}
  5 | {-# LANGUAGE Strict #-}
  6 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-}
  7 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-}
  8 | 
  9 | module Syntax where
 10 | 
 11 | --------------------------------------------------------------------------------
 12 | 
 13 | import Prelude ( Int , Char , Eq , Show )
 14 | import PrimGHC
 15 | 
 16 | --------------------------------------------------------------------------------
 17 | 
 18 | import Base
 19 | import Containers
 20 | import Types
 21 | -- import Core
 22 | import PrimOps
 23 | 
 24 | {-% include "Base.hs"        %-}
 25 | {-% include "Containers.hs"  %-}
 26 | {-% include "Types.hs"       %-}
 27 | -- {-% include "Core.hs"        %-}
 28 | {-% include "PrimOps.hs"     %-}
 29 | 
 30 | --------------------------------------------------------------------------------
 31 | -- * Surface syntax
 32 | 
 33 | type DefinE  = Defin  Expr
 34 | type LDefinE = LDefin Expr
 35 | 
 36 | -- | We \"parse\" (well, recognize) type declarations, data declarations,
 37 | -- type synonyms and imports, but we ignore them; this is simply so that the
 38 | -- this source code can be a valid Haskell program and self-hosting at the
 39 | -- same time.
 40 | data TopLevel
 41 |   = TopDefin    LDefinE
 42 |   | TopTyDecl   Name (List Token)
 43 |   | TopDataDecl (List Token)
 44 |   | TopTyAlias  (List Token)
 45 |   | TopImport   (List Token)
 46 |   | TopInclude  FilePath
 47 |   | TopModule   (List Token)
 48 |   deriving Show
 49 | 
 50 | filterIncludes :: List TopLevel -> List FilePath
 51 | filterIncludes = go where { go ls = case ls of { Nil -> Nil ; Cons this rest ->
 52 |   case this of { TopInclude fn -> Cons fn (go rest) ; _ -> go rest }}}
 53 | 
 54 | mbDefin :: TopLevel -> Maybe LDefinE
 55 | mbDefin toplev = case toplev of { TopDefin def -> Just def ; _ -> Nothing }
 56 | 
 57 | --------------------------------------------------------------------------------
 58 | 
 59 | type LExpr = Located Expr
 60 | 
 61 | data Expr
 62 |   = VarE  LName
 63 |   | AppE  Expr Expr
 64 |   | LamE  Name Expr
 65 |   | LetE  (List LDefinE) Expr
 66 |   | RecE  (List LDefinE) Expr
 67 |   | CaseE LExpr (List BranchE)
 68 |   | LitE  Literal
 69 |   | ListE (List Expr)
 70 |   | PrimE PrimOp (List Expr)
 71 |   | StrE  Int
 72 |   | MainE
 73 |   deriving Show
 74 | 
 75 | data BranchE
 76 |   = BranchE Name (List Name) Expr
 77 |   | DefaultE Expr
 78 |   deriving Show
 79 | 
 80 | -- data BranchE
 81 | --   = BranchE Pattern Expr
 82 | --   deriving Show
 83 | 
 84 | isLambdaExpr :: Expr -> Bool
 85 | isLambdaExpr expr = case expr of { LamE _ _ -> True ; _ -> False }
 86 | 
 87 | lamsE :: List Name -> Expr -> Expr
 88 | lamsE args body = case args of { Nil -> body ; Cons v vs -> LamE v (lamsE vs body) }
 89 | 
 90 | appsE :: Expr -> List Expr -> Expr
 91 | appsE fun  args = case args of { Nil -> fun  ; Cons t ts -> appsE (AppE fun t) ts  }
 92 | 
 93 | listAppsE :: List Expr -> Expr
 94 | listAppsE  list = case list of { Cons f args -> appsE f args ; Nil -> error "listAppsE" }
 95 | 
 96 | --------------------------------------------------------------------------------
 97 | -- ** TODO: Pattern compiler 
 98 | 
 99 | -- SimP  = simple pattern (we compile down to this)
100 | -- AppP  = constructor pattern
101 | -- VarP  = variable pattern
102 | -- WildP = wildcard patterns
103 | -- ListP = list pattern
104 | data Pattern
105 |   = SimP Name (List Name)      
106 |   | AppP Name (List Pattern)   
107 |   | VarP Name                  
108 |   | WildP                      
109 |   | ListP (List Pattern)       
110 |   deriving Show
111 | 
112 | patternHead :: Pattern -> Maybe Name
113 | patternHead pat = case pat of
114 |   { SimP con _list -> Just con
115 |   ; AppP con _list -> Just con
116 |   ; _              -> Nothing }
117 | 
118 | -- -- | We translate complex pattern into iterated matching of simple patterns
119 | -- patternCompiler :: List BranchE -> List BranchE
120 | -- patternCompiler
121 | 
122 | --------------------------------------------------------------------------------
123 | -- ** free and bound variables
124 | 
125 | -- | Free variables in an expression
126 | exprFreeVars :: Expr -> TrieSet
127 | exprFreeVars expr = go trieEmpty expr where
128 |   { go bound expr = case expr of
129 |     { VarE lname     -> let { nam = located lname } in case trieMember nam bound of { False -> trieSetSingleton nam ; _ -> trieEmpty }
130 |     ; AppE e1 e2     -> trieUnion (go bound e1) (go bound e2)
131 |     ; LamE v body    -> go (trieSetInsert v bound) body
132 |     ; LetE defs body -> goLets   bound (map located defs) body
133 |     ; RecE defs body -> goLetRec bound (map located defs) body
134 |     ; CaseE what brs -> trieUnion (go bound (located what)) (trieUnions (map (goBranch bound) brs))
135 |     ; LitE  _        -> trieEmpty
136 |     ; ListE list     -> trieUnions (map (go bound) list)
137 |     ; PrimE p list   -> trieUnions (map (go bound) list)
138 |     ; StrE  _        -> trieEmpty }
139 |   ; goBranch bound branch = case branch of
140 |     { DefaultE rhs     -> go bound rhs 
141 |     ; BranchE _ ns rhs -> let { bound' = trieUnion (trieSetFromList ns) bound } in go bound' rhs }
142 |   ; goLets bound defs body = case defs of { Nil -> go bound body ; Cons def rest -> case def of { Defin nam rhs ->
143 |       let { bound' = trieSetInsert nam bound } in trieUnion (go bound rhs) (goLets bound' rest body) } }
144 |   ; goLetRec bound defs body = let 
145 |       { bound' = trieUnion (trieSetFromList (map definedName defs)) bound 
146 |       ; stuff = map (\defin -> go bound' (definedWhat defin)) defs
147 |       } in trieUnion (trieUnions stuff) (go bound' body) }
148 | 
149 | --------------------------------------------------------------------------------
150 | -- ** recognize PrimOps
151 | 
152 | -- | From @((f x) y) z@ we create the list [f,x,y,z]
153 | recogAppsE :: Expr -> List Expr
154 | recogAppsE = compose reverse go where
155 |   { go expr = case expr of { AppE f x -> Cons x (go f)  ; _ -> singleton expr } }
156 | 
157 | -- | temporary argument names for saturating primops. Note that all primops have at most 3 arguments
158 | tmpVars :: List Name
159 | tmpVars = ["x1","x2","x3","x4","x5"]
160 | -- tmpVars = map (\i -> append "x" (showInt i)) (rangeFrom 1 5)
161 | 
162 | -- | Saturate primop application
163 | saturatePrimApp :: Location -> PrimOp -> List Expr -> Expr
164 | saturatePrimApp loc primop args = case primop of { PrimOp arity prim -> case compare nargs arity of
165 |   { EQ ->        PrimE primop args
166 |   ; GT -> appsE (PrimE primop (take arity args)) (drop arity args)
167 |   ; LT -> let { vars = take (minus arity nargs) tmpVars }
168 |           in  lamsE vars (PrimE primop (append args (map (\v -> VarE (Located loc v)) vars)))
169 |   } }
170 |   where { nargs = length args }
171 | 
172 | -- | Recognize primop applications, and saturate them if necessary
173 | recogPrimApps :: Mode -> List DefinE -> List DefinE
174 | recogPrimApps mode prg = map (fmapDefin (recogPrimApps1 primops)) prg where { primops = thePrimOps mode }
175 | 
176 | -- | Recognize primop applications, and saturate them if necessary
177 | recogPrimApps1 :: Trie PrimOp -> Expr -> Expr
178 | recogPrimApps1 primops = go where
179 |   { goVar lname = case trieLookup (located lname) primops of
180 |       { Nothing        -> VarE lname
181 |       ; Just primop    -> saturatePrimApp (location lname) primop [] }
182 |   ; go expr = case expr of
183 |       { VarE lname      -> goVar lname
184 |       ; AppE _ _       -> case recogAppsE expr of { Cons f args -> case f of
185 |           { VarE ln      -> case trieLookup (located ln) primops of
186 |               { Nothing     -> appsE (VarE ln)                      (map go args)
187 |               ; Just primop -> saturatePrimApp (location ln) primop (map go args) }
188 |           ; _            -> appsE (go    f) (map go args) } }
189 |       ; LamE arg  body -> LamE  arg  (go body)
190 |       ; LetE defs body -> LetE  (map goDefin defs) (go body)
191 |       ; RecE defs body -> RecE  (map goDefin defs) (go body)
192 |       ; CaseE what brs -> CaseE (lfmap go what) (map goBranch brs)
193 |       ; ListE exprs    -> ListE (map go exprs)
194 |       ; _              -> expr }
195 |   ; goBranch branch = case branch of
196 |       { BranchE con args rhs -> BranchE con args (go rhs)
197 |       ; DefaultE         rhs -> DefaultE         (go rhs) }
198 |   ; goDefin ldefin = case ldefin of { Located loc defin -> case defin of
199 |       { Defin name expr -> Located loc (Defin name (go expr)) } } }
200 | 
201 | --------------------------------------------------------------------------------
202 | -- * extract string constants
203 | 
204 | type StrState  = Pair Int (List String)
205 | type Stringy a = State StrState a
206 | 
207 | addString :: String -> Stringy Int
208 | addString what = sbind sget (\pair -> case pair of { Pair n list -> 
209 |                  sbind (sput (Pair (inc n) (Cons what list))) (\_ -> sreturn n) })
210 | 
211 | extractStringConstants :: List LDefinE -> Pair (List String) (List LDefinE)
212 | extractStringConstants program = case runState (smapM worker program) (Pair 0 Nil) of
213 |   { Pair fstate prg' -> Pair (reverse (snd fstate)) prg' } 
214 |   where { worker ldefin = case ldefin of { Located loc defin -> case defin of 
215 |     { Defin name rhs -> sfmap (\x -> Located loc (Defin name x)) (extractStringConstants1 rhs) }}}
216 | 
217 | extractStringConstants1 :: Expr -> Stringy Expr 
218 | extractStringConstants1 expr = go expr where
219 |   { go expr = case expr of
220 |     { LitE  lit      -> case lit of { StrL str  -> sfmap StrE (addString str) ; _ -> sreturn expr }
221 |     ; VarE  _        -> sreturn expr
222 |     ; AppE  e1 e2    -> sliftA2 AppE (go e1) (go e2)
223 |     ; LamE  n body   -> sfmap  (LamE n) (go body)
224 |     ; LetE  defs rhs -> sliftA2 LetE  (smapM goDefin defs) (go rhs)
225 |     ; RecE  defs rhs -> sliftA2 RecE  (smapM goDefin defs) (go rhs)
226 |     ; CaseE what brs -> sliftA2 CaseE (lgo what) (smapM goBranch brs)
227 |     ; ListE ls       -> sfmap   ListE (smapM go ls)
228 |     ; PrimE pri args -> sfmap  (PrimE pri) (smapM go args) }
229 |   ; goDefin  ldefin = case ldefin of { Located loc defin -> case defin of 
230 |       { Defin name rhs -> sfmap (\e -> Located loc (Defin name e)) (go rhs) }}
231 |   ; goBranch branch = case branch of 
232 |     { BranchE con args rhs -> sfmap (BranchE con args) (go rhs)
233 |     ; DefaultE         rhs -> sfmap (DefaultE        ) (go rhs) } 
234 |   ; lgo lexpr = case lexpr of { Located loc expr -> sfmap (Located loc) (go expr) } }
235 | 
236 | --------------------------------------------------------------------------------
237 | 
238 | 
239 | 


--------------------------------------------------------------------------------
/Types.hs:
--------------------------------------------------------------------------------
  1 | 
  2 | -- | Common types and functions
  3 | 
  4 | {-# LANGUAGE NoImplicitPrelude, MagicHash #-}
  5 | {-# LANGUAGE Strict #-}
  6 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-}
  7 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-}
  8 | 
  9 | module Types where
 10 | 
 11 | --------------------------------------------------------------------------------
 12 | 
 13 | import Prelude ( Int , Char , Eq , Show )
 14 | import PrimGHC
 15 | 
 16 | --------------------------------------------------------------------------------
 17 | 
 18 | import Base
 19 | import Containers
 20 | 
 21 | {-% include "Base.hs"        %-}
 22 | {-% include "Containers.hs"  %-}
 23 | 
 24 | --------------------------------------------------------------------------------
 25 | -- * Some common types
 26 | 
 27 | -- | Names
 28 | type Name = String
 29 | 
 30 | -- | Arity
 31 | type Arity = Int
 32 | 
 33 | -- | De Bruijn level
 34 | type Level = Int
 35 | 
 36 | -- | De Bruijn index
 37 | type Idx = Int
 38 | 
 39 | -- | Constructor index
 40 | type Con = Int
 41 | 
 42 | -- | Size
 43 | type Size = Int
 44 | 
 45 | -- | Top-level index
 46 | type TopIdx = Int
 47 | 
 48 | -- | Static function index
 49 | type Static = Int
 50 | 
 51 | -- | Mapping constructor names to constructor tags
 52 | type DataConTable = Trie Con
 53 | 
 54 | -- | Are we compiling or interpreting? This is relevant with primops, 
 55 | -- where the two behaves differently...
 56 | data Mode
 57 |   = Compile
 58 |   | Interpret
 59 |   deriving Show
 60 | 
 61 | -- | Sometimes we need some fake argument for recursive definitions... (?!?)
 62 | data Fake = Fake
 63 | 
 64 | --------------------------------------------------------------------------------
 65 | -- ** Named things
 66 | 
 67 | -- | We want to keep names for debugging \/ pretty printing
 68 | data Named a = Named Name a deriving Show
 69 | 
 70 | nfmap :: (a -> b) -> Named a -> Named b
 71 | nfmap f named = case named of { Named name x -> Named name (f x) }
 72 | 
 73 | forgetName :: Named a -> a
 74 | forgetName x = case x of { Named _ y -> y }
 75 | 
 76 | nameOf :: Named a -> String
 77 | nameOf x = case x of { Named n _ -> n }
 78 | 
 79 | --------------------------------------------------------------------------------
 80 | -- ** Definitions
 81 | 
 82 | -- | Definitions
 83 | data Defin a = Defin Name a deriving Show
 84 | 
 85 | fmapDefin :: (a -> b) -> Defin a -> Defin b
 86 | fmapDefin f defin = case defin of { Defin n x -> Defin n (f x) }
 87 | 
 88 | definedName :: Defin a -> Name
 89 | definedName defin = case defin of { Defin n _ -> n }
 90 | 
 91 | definedWhat :: Defin a -> a
 92 | definedWhat defin = case defin of { Defin _ e -> e }
 93 | 
 94 | definToPair :: Defin a -> Pair Name a
 95 | definToPair def = case def of { Defin n rhs -> Pair n rhs }
 96 | 
 97 | definToNamed :: Defin a -> Named a
 98 | definToNamed def = case def of { Defin n rhs -> Named n rhs }
 99 | 
100 | namedToDefin :: Named a -> Defin a
101 | namedToDefin named = case named of { Named n x -> Defin n x }
102 | 
103 | ldefinToPair :: LDefin a -> Pair Name (Located a)
104 | ldefinToPair ldef = case ldef of { Located loc def -> case def of { Defin n rhs -> Pair n (Located loc rhs) } }
105 | 
106 | --------------------------------------------------------------------------------
107 | 
108 | type LDefin a = Located (Defin a)
109 | 
110 | fmapLDefin :: (a -> b) -> LDefin a -> LDefin b
111 | fmapLDefin f = lfmap (fmapDefin f)
112 | 
113 | ldefinedName :: LDefin a -> Name
114 | ldefinedName = compose definedName located
115 | 
116 | nameAndLoc :: LDefin a -> Pair Name Location
117 | nameAndLoc ldefin = case ldefin of { Located loc defin -> case defin of { Defin name _ -> Pair name loc }}
118 | 
119 | showNameAndLoc :: LDefin a -> String
120 | showNameAndLoc ldefin = case nameAndLoc ldefin of { Pair name loc -> append3 (quoteString name) " at " (showLocation loc) }
121 | 
122 | --------------------------------------------------------------------------------
123 | -- * Programs
124 | 
125 | -- | We partition our programs into non-recursive definitions and mutually recursive blocks
126 | data Block a
127 |   = NonRecursive       (LDefin a)
128 |   | Recursive    (List (LDefin a))
129 |   deriving Show
130 | 
131 | type Program a = List (Block a)
132 | 
133 | forgetBlockStructure :: Program a -> List (LDefin a)
134 | forgetBlockStructure prg = go prg where
135 |   { go blocks = case blocks of { Nil -> Nil ; Cons this rest -> case this of
136 |     { NonRecursive defin  -> Cons defin    (go rest) 
137 |     ; Recursive    defins -> append defins (go rest) } } }
138 | 
139 | --------------------------------------------------------------------------------
140 | -- ** Literals
141 | 
142 | data Literal
143 |   = IntL Int
144 |   | ChrL Char
145 |   | StrL String
146 |   deriving (Eq,Show)
147 | 
148 | showLiteral :: Literal -> String
149 | showLiteral lit = case lit of
150 |   { IntL n -> showInt  n
151 |   ; ChrL c -> showChar c
152 |   ; StrL s -> doubleQuoteString s }
153 | 
154 | --------------------------------------------------------------------------------
155 | -- ** Variables
156 | 
157 | -- | Variables can be a de Bruijn index, or level, or a top-level definition, or a static string index
158 | data Var
159 |   = IdxV Idx
160 |   | LevV Level
161 |   | TopV Static
162 |   | StrV Int
163 |   deriving Show
164 | 
165 | prettyVar :: Var -> String
166 | prettyVar var = case var of 
167 |   { IdxV i -> concat [ "$" , showInt i ] 
168 |   ; LevV j -> concat [ "#" , showInt j ] 
169 |   ; TopV k -> concat [ "statfun(" , showInt k , ")" ] 
170 |   ; StrV m -> concat [ "str<"     , showInt m , ">" ]}
171 | 
172 | --------------------------------------------------------------------------------
173 | -- ** Atoms
174 | 
175 | -- | Things which can be applied, case-branched, passed to primops
176 | data Atom
177 |   = VarA (Named Var)
178 |   | ConA (Named Con)
179 |   | KstA Literal
180 |   deriving Show
181 | 
182 | prettyAtom :: Atom -> String
183 | prettyAtom atom = case atom of
184 |   { VarA nvar -> append (nameOf nvar) (prettyVar (forgetName nvar))
185 |   ; KstA lit  -> showLiteral lit 
186 |   ; ConA ncon -> nameOf ncon }
187 | --  ; ConA ncon -> case ncon of { Named name con -> append3 name ":" (showNat con) }
188 | 
189 | --------------------------------------------------------------------------------
190 | -- ** Source positions and locations
191 | 
192 | -- | @SrcPos row col@; starting from (1,1)
193 | data SrcPos = SrcPos Int Int deriving Show
194 | 
195 | startSrcPos :: SrcPos
196 | startSrcPos = SrcPos 1 1
197 | 
198 | startCol :: SrcPos -> SrcPos
199 | startCol pos = case pos of { SrcPos row col -> SrcPos row 1 }
200 | 
201 | nextCol :: SrcPos -> SrcPos
202 | nextCol pos = case pos of { SrcPos row col -> SrcPos row (inc col) }
203 | 
204 | nextRow :: SrcPos -> SrcPos
205 | nextRow pos = case pos of { SrcPos row col -> SrcPos (inc row) 1 }
206 | 
207 | nextSrcPos :: Char -> SrcPos -> SrcPos
208 | nextSrcPos ch pos
209 |   = ifte (ceq ch newlineC       ) (nextRow  pos)
210 |   ( ifte (ceq ch carriageReturnC) (startCol pos) (nextCol pos) )
211 | 
212 | showSrcPos :: SrcPos -> String
213 | showSrcPos pos = case pos of { SrcPos row col ->
214 |   append ("line ") (append3 (showNat row) (", column ") (showNat col)) }
215 | 
216 | showSrcPos_ :: SrcPos -> String
217 | showSrcPos_ pos = case pos of { SrcPos row col -> (append3 (showNat row) ":" (showNat col)) }
218 | 
219 | showSrcPos' :: FilePath -> SrcPos -> String
220 | showSrcPos' fname pos = append3 "file " (doubleQuoteString fname) (append ", " (showSrcPos pos))
221 | 
222 | showLocation :: Location -> String
223 | showLocation loc = case loc of { Loc fname pos1 pos2 -> concat
224 |   [ "file " , doubleQuoteString fname , ", " , showSrcPos_ pos1 , "--" , showSrcPos_ pos2 ] }
225 | 
226 | -- | Note: For stringy code-gen, we have to escape double quotes, because the became string literals
227 | escapedShowLocation :: Location -> String
228 | escapedShowLocation loc = case loc of { Loc fname pos1 pos2 -> concat
229 |   [ "file " , escapedDoubleQuoteString fname , ", " , showSrcPos_ pos1 , "--" , showSrcPos_ pos2 ] }
230 | 
231 | data Location  = Loc FilePath SrcPos SrcPos deriving Show
232 | data Located a = Located Location a         deriving Show
233 | 
234 | type LName     = Located Name
235 | type LAtom     = Located Atom
236 | 
237 | lfmap :: (a -> b) -> Located a -> Located b
238 | lfmap f located = case located of { Located loc x -> Located loc (f x) }
239 | 
240 | locFn    loc = case loc of { Loc fn _ _    -> fn   }
241 | locStart loc = case loc of { Loc _  pos1 _ -> pos1 }
242 | locEnd   loc = case loc of { Loc _  _ pos2 -> pos2 }
243 | 
244 | location lx = case lx of { Located loc _ -> loc }
245 | located  lx = case lx of { Located _   x -> x   }
246 | 
247 | locatedStart = compose locStart location
248 | locatedEnd   = compose locEnd   location
249 | 
250 | fakeLocation  = Loc "<source>" (SrcPos 0 0) (SrcPos 0 0)
251 | fakeLocated x = Located fakeLocation x
252 | 
253 | --------------------------------------------------------------------------------
254 | -- ** Tokens
255 | 
256 | data Special
257 |   = LParen | RParen | LBrace | RBrace | LBracket | RBracket | Dot
258 |   | Comma | Semicolon | EqualSign | Lambda | Pipe | Arrow | DArrow | HasType
259 |   deriving (Eq,Show)
260 | 
261 | data Token
262 |   = VarTok   Name
263 |   | LitTok   Literal
264 |   | SpecTok  Special
265 |   | WhiteTok
266 |   deriving (Eq,Show)
267 | 
268 | -- | Token wiht a location
269 | type LToken = Located Token
270 | 
271 | --------------------------------------------------------------------------------
272 | -- * matching on short lists
273 | 
274 | nullary :: List a ->                 b  -> b
275 | unary   :: List a -> (a ->           b) -> b
276 | binary  :: List a -> (a -> a ->      b) -> b
277 | ternary :: List a -> (a -> a -> a -> b) -> b
278 | 
279 | nullary args f = case args of { _         -> f               }
280 | unary   args f = case args of { Cons x xs -> f x             ; _ -> error "unary: not enough arguments"   }
281 | binary  args f = case args of { Cons x xs -> unary  xs (f x) ; _ -> error "binary: not enough arguments"  }
282 | ternary args f = case args of { Cons x xs -> binary xs (f x) ; _ -> error "ternary: not enough arguments" }
283 | 
284 | --------------------------------------------------------------------------------
285 | 


--------------------------------------------------------------------------------
/_main_old.hs:
--------------------------------------------------------------------------------
  1 | 
  2 | {-# LANGUAGE NoImplicitPrelude, MagicHash  #-}
  3 | {-# LANGUAGE Strict, StrictData #-}
  4 | {-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-}
  5 | {-# LANGUAGE OverloadedStrings, OverloadedLists #-}
  6 | 
  7 | module Main where
  8 | 
  9 | --------------------------------------------------------------------------------
 10 | 
 11 | import Prelude ( Int , Char , Eq , Show(..) , (++) , (.) , ($)  )
 12 | import qualified Prelude
 13 | 
 14 | import qualified Control.Monad
 15 | 
 16 | import System.Environment
 17 | 
 18 | import PrimGHC
 19 | 
 20 | import Nano hiding ( main )
 21 | 
 22 | import Base 
 23 | import Containers
 24 | import Types
 25 | import PrimOps
 26 | import DataCon
 27 | import Syntax
 28 | import Parser
 29 | import Dependency
 30 | import Core
 31 | import ScopeCheck
 32 | import Inliner
 33 | import Closure
 34 | import CodeGen
 35 | 
 36 | import qualified Text.Show.Pretty as Pretty
 37 | 
 38 | --------------------------------------------------------------------------------
 39 | 
 40 | myPrettyTermDefin :: Prelude.String -> Defin Term -> Prelude.IO ()
 41 | myPrettyTermDefin prefix (Defin n t) = Prelude.putStrLn (Prelude.concat list) where
 42 |   list = [ prefix , _toGhcString n , " := " , _toGhcString (showTerm t) ] :: [Prelude.String]
 43 | 
 44 | myPrettyTermBlock :: Block Term -> Prelude.IO ()
 45 | myPrettyTermBlock (NonRecursive def) =                      myPrettyTermDefin "- "   def
 46 | myPrettyTermBlock (Recursive defs  ) = Control.Monad.mapM_ (myPrettyTermDefin "> ") (_toGhcList defs)
 47 | 
 48 | 
 49 | main = do
 50 |   Prelude.putStrLn "*** main.hs"
 51 |   args <- System.Environment.getArgs
 52 |   case args of
 53 |     [] -> error "usage: runghc main.hs <inputfile>"
 54 |     (fn:_) -> compile fn
 55 | 
 56 | -- type Files  = List FilePath
 57 | -- type Loaded = Pair Files (List TopLevel)
 58 | -- 
 59 | -- loadAndParseMany :: Files -> List FilePath -> IO Loaded
 60 | -- loadAndParseMany sofar fnames = case fnames of { Nil -> ioreturn (Pair sofar Nil) ; Cons this rest ->
 61 | --   iobind (loadAndParse1    sofar  this) (\loaded -> case loaded of { Pair sofar'  toplevs1 ->
 62 | --   iobind (loadAndParseMany sofar' rest) (\loaded -> case loaded of { Pair sofar'' toplevs2 ->
 63 | --   ioreturn (Pair sofar'' (append toplevs1 toplevs2)) }) }) }
 64 | -- 
 65 | -- loadAndParse1 :: Files -> FilePath -> IO Loaded
 66 | -- loadAndParse1 sofar fname = case stringMember fname sofar of
 67 | --   { True  -> ioreturn (Pair sofar Nil)
 68 | --   ; False -> iobind (readFile fname) (\text -> ioseq (putStrLn (append "+ " fname)) (let 
 69 | --       { blocks   = lexer fname text
 70 | --       ; toplevs  = map (parseTopLevelBlock fname) blocks
 71 | --       ; includes = filterIncludes toplevs
 72 | --       ; sofar'   = Cons fname sofar
 73 | --       } in iobind (loadAndParseMany sofar' includes) (\loaded -> case loaded of { Pair sofar'' toplevs2 -> 
 74 | --            ioreturn (Pair sofar'' (append toplevs toplevs2)) }))) }
 75 | 
 76 | compile fname = do
 77 |   let source = (_fromGhcString fname)
 78 | 
 79 | --  text <- Prelude.readFile fname
 80 | --
 81 | --  let blocks = (lexer source (_fromGhcString text))
 82 | --  Prelude.putStrLn ("number of top level blocks = " ++ show (length blocks))
 83 | --  -- Prelude.print (_toGhcList blocks)
 84 | --
 85 | --  let toplevs = map (parseTopLevelBlock source) blocks
 86 | ----  Prelude.putStrLn "\n----------------------------------\nSYNTAX BLOCKS"
 87 | ----  Control.Monad.mapM_ Prelude.print (_toGhcList toplevs)
 88 | 
 89 |   loaded <- runIO (loadAndParse1 Nil source)
 90 |   let toplevs = snd loaded
 91 | 
 92 |   let defins0 = catMaybes (map mbDefin toplevs)
 93 |   let Pair strlits defins1 = extractStringConstants defins0
 94 |   Prelude.putStrLn "\n----------------------------------\nTOPLEVEL DEFINS"
 95 |   Control.Monad.mapM_ Prelude.print (_toGhcList (recogPrimApps defins1))
 96 | 
 97 |   let dconTrie = collectDataCons defins1
 98 |   Prelude.putStrLn "\n----------------------------------\nCONSTRUCTORS"
 99 |   Control.Monad.mapM_ Prelude.print (_toGhcList (trieToList dconTrie))
100 | 
101 |   let blocks = reorderProgram defins1
102 |   Prelude.putStrLn "\n----------------------------------\nREORDERED TOPLEVEL DEFINS"
103 |   Control.Monad.mapM_ Prelude.print (_toGhcList blocks)
104 | 
105 |   let coreprg@(CorePrg coredefs mainIdx mainTerm) = programToCoreProgram dconTrie blocks
106 |   Prelude.putStrLn "\n----------------------------------\nCORE"
107 |   Control.Monad.mapM_ myPrettyTermBlock (_toGhcList coredefs)
108 |   Prelude.print (mainIdx,mainTerm)
109 | 
110 |   -- let coreprg'@(CorePrg coredefs' mainIdx' mainTerm') = inlineCorePrg 24 coreprg
111 |   let coreprg'@(CorePrg coredefs' mainIdx' mainTerm') = optimizeCorePrg coreprg
112 |   Prelude.putStrLn "\n----------------------------------\nOPTIMIZED CORE"
113 |   Control.Monad.mapM_ myPrettyTermBlock (_toGhcList coredefs')
114 |   Prelude.print (mainIdx',mainTerm')
115 | 
116 | 
117 |   let lprogram = coreProgramToLifted coreprg'
118 |       LProgram statfuns topidxs lmain = lprogram
119 |   Prelude.putStrLn "\n----------------------------------\nLIFTED"
120 |   Control.Monad.mapM_ Prelude.print (_toGhcList statfuns)
121 |   Prelude.print lmain
122 |   -- Prelude.print topidxs
123 | 
124 |   let code = runCodeGenM_ (liftedProgramToCode source strlits dconTrie lprogram)
125 |   -- Prelude.putStrLn "\n----------------------------------\nASM"
126 |   -- Control.Monad.mapM_ (Prelude.putStrLn . _toGhcString) (_toGhcList asm)
127 |   Prelude.writeFile "tmp.c" (Prelude.unlines $ Prelude.map _toGhcString $ _toGhcList code)
128 | 
129 |   -- let val = eval Nil core
130 |   -- Prelude.putStrLn "\n----------------------------------\nINTEPRETED RESULT"
131 |   -- Prelude.print (showValue val)
132 | 
133 | 


--------------------------------------------------------------------------------
/asm_shit/rts.asm:
--------------------------------------------------------------------------------
  1 | 
  2 | ;
  3 | ; RTS conventions
  4 | ; ===============
  5 | ; A) On the heap, we can have 1) closures/PAPs and 2) data constructors  
  6 | ;    these are distinguished by bit #2. We reserve bits #0/#1 for GC.
  7 | ;     * for closures, the second word is the function pointer, and the rest
  8 | ;       is the environment / partially applied arguments
  9 | ;     * for data constructors, the tag is simply followed by the arguments.
 10 | ; B) Heap pointers can be actual pointers or objects fitting in a machine word.
 11 | ;    Since heap pointers are aligned, we can use the low 3 bits to distinguish
 12 | ;    between these.:
 13 | ;     * low bits = 000 means an actual heap pointer
 14 | ;     * low bits = 001 means a 61 bit literal (int or char). 
 15 | ;     * low bits = 010 means a static function pointer
 16 | ;     * low bits = 100 means a nullary data constructor
 17 | ;    This means that static functions must be aligned too
 18 | ; C) We have a custom stack to store the enviroment. The stack pointer is RBP
 19 | ;    Arguments are always passed on this stack
 20 | ;
 21 | 
 22 | bits 64
 23 | global start 
 24 | 
 25 | section .bss
 26 | 
 27 | align 8
 28 | heap:  resb (1024*1024*128)   ; 128 mb
 29 | stack: resq (1024*1024)       ; 8 mb      
 30 | 
 31 | section .data
 32 | 
 33 | errmsg_app: db  "runtime error: invalid application",10
 34 | .len:       equ $ - errmsg_app
 35 | 
 36 | section .text
 37 | 
 38 | %define CON_False   0
 39 | %define CON_True    1
 40 | %define CON_Unit    2
 41 | %define CON_Nil     3
 42 | %define CON_Cons    4
 43 | %define CON_Nothing 5
 44 | %define CON_Just    6
 45 | 
 46 | %define HEAP_PTR       r15
 47 | %define STACK_PTR      rbp
 48 | %define STACK_PTR_COPY r14
 49 | %define ARG0  rax
 50 | %define ARG1  rcx
 51 | %define ARG2  rdx
 52 | 
 53 | %define DE_BRUIJN(idx)        qword [ STACK_PTR      - 8*(idx+1) ]
 54 | %define DE_BRUIJN_COPY(idx)   qword [ STACK_PTR_COPY - 8*(idx+1) ]
 55 | %macro PUSH 1
 56 |   mov [STACK_PTR],%1
 57 |   add STACK_PTR,8
 58 | %endmacro
 59 | %macro POP 1
 60 |   sub STACK_PTR,8
 61 |   mov %1,[STACK_PTR]
 62 | %endmacro
 63 | 
 64 | %define INT_LITERAL(i)   (i<<3) + 1
 65 | %define CHR_LITERAL(c)   INT_LITERAL(c)
 66 | %define NULLARY_CON(con) (con<<3) + 4
 67 | 
 68 | %define IS_HEAP_PTR  test al,7
 69 | 
 70 | %define FROM_INT_LITERAL      sar rax,3
 71 | %define FROM_INT_LITERAL_ARG0 sar ARG0,3
 72 | %define FROM_INT_LITERAL_ARG1 sar ARG1,3
 73 | %macro  FROM_INT_LITERAL_ARGS01 0
 74 |   FROM_INT_LITERAL_ARG0
 75 |   FROM_INT_LITERAL_ARG1
 76 | %endmacro
 77 | %macro TO_INT_LITERAL 0
 78 |   shl rax,3
 79 |   or al,1
 80 | %endmacro
 81 | 
 82 | %define FROM_BOOL_CON      shr rax,3
 83 | %define FROM_BOOL_CON_ARG0 shr ARG0,3
 84 | %define FROM_BOOL_CON_ARG1 shr ARG1,3
 85 | %macro  FROM_BOOL_CON_ARGS01 0
 86 |   FROM_BOOL_CON_ARG0
 87 |   FROM_BOOL_CON_ARG1
 88 | %endmacro
 89 | %macro TO_BOOL_CON 0
 90 |   shl rax,3
 91 |   or al,2
 92 | %endmacro
 93 |  
 94 | %define FROM_STATIC_FUNPTR    and al,0xf8
 95 | %define TO_STATIC_FUNPTR      or  al,2
 96 | 
 97 | %define TAG_DATACON(con_tag ,con_arity)  (con_arity<<16) + (con_tag   << 3) + 4
 98 | %define TAG_CLOSURE(env_size,rem_arity)  (env_size <<16) + (rem_arity << 3) + 0
 99 | 
100 | ; args: size in words
101 | %macro ALLOCATE_HEAP 1
102 |   mov rax,HEAP_PTR
103 |   add HEAP_PTR , (8 * %1)
104 | %endmacro
105 | 
106 | %macro DYN_ALLOCATE_HEAP 1
107 |   mov rax,%1
108 |   shl rax,3
109 |   add rax,HEAP_PTR
110 |   xchg rax,HEAP_PTR
111 | %endmacro
112 | 
113 | ; args: static label, env size, rem_arity
114 | %macro ALLOCATE_CLOSURE 3
115 |   ALLOCATE_HEAP (%2 + 2)  
116 |   mov rdx,TAG_CLOSURE(%2,%3)
117 |   mov [rax],rdx
118 |   mov rdx,%1
119 |   mov [rax+8],rdx
120 | %endmacro
121 | 
122 | %macro DYN_ALLOCATE_CLOSURE 3
123 |   mov rax,%1
124 |   add rax,2
125 |   shl rax,3
126 |   add rax,HEAP_PTR
127 |   xchg rax,HEAP_PTR
128 |   mov rdx,%2  ; env size
129 |   shl rdx,13
130 |   add rdx,%3  ; rem arity
131 |   shl rdx,3
132 |   mov [rax],rdx
133 |   mov rdx,%1
134 |   mov [rax+8],rdx
135 | %endmacro
136 | 
137 | ; args: con_tag, con_arity
138 | %macro ALLOCATE_DATACON 2
139 |   ALLOCATE_HEAP (%2 + 1)  
140 |   mov rdx,TAG_DATACON(%1,%2)
141 |   mov [rax],rdx
142 | %endmacro
143 | 
144 | ; args: con_tag, con_arity
145 | %macro DYN_ALLOCATE_DATACON 2
146 |   mov rax,%1
147 |   inc rax
148 |   shl rax,3
149 |   add rax,HEAP_PTR
150 |   xchg rax,HEAP_PTR
151 |   mov rdx,%2  ; con arity
152 |   shl rdx,13
153 |   add rdx,%1  ; con tag
154 |   shl rdx,3
155 |   mov [rax],rdx
156 | %endmacro
157 | 
158 | ;-------------------------------------------------------------------------------
159 | 
160 |   . "; evaluate a thunk on RAX"
161 | rts_eval_thunk:
162 | IS_HEAP_PTR
163 | jne .singleword
164 | mov rdx,[rax]
165 | test dx,dx       ; thunk = low 3 bits is zero and the next 13 bits too
166 | jnz .evaluated
167 | .thunk:
168 | push rsi
169 | push rdi
170 | mov rsi,rax
171 | mov rcx,[rsi]
172 | shr rcx,16       ; size of the environment
173 | push rcx
174 | lea rdi,[rsi+16]
175 | .push_loop:
176 | mov rax,[rdi]
177 | PUSH rax
178 | add rdi,8
179 | loop .push_loop
180 | call [rsi+8]
181 | pop rcx
182 | sub STACK_PTR,rcx
183 | pop rdi
184 | pop rsi
185 | retn
186 | .singleword:
187 | mov dl,al
188 | and dl,7
189 | cmp dl,2
190 | je .static_funptr
191 | .evaluated:
192 | retn
193 | .static_funptr:
194 | mov rbx,rax
195 | and bl,0xf8
196 | mov rcx,[rbx-8]    ; env size / arity
197 | or rcx,rcx         ; both is zero?
198 | jnz .evaluated
199 | call rbx           ; example: CAF like a string. No environment, no arguments
200 | retn 
201 | 
202 | ;---------------------------------------
203 | 
204 | ; apply to an unknown function. RAX = heap ptr to function
205 | ; RCX = number of arguments on the stack (assumed to be positive!)
206 | rts_apply:
207 | IS_HEAP_PTR
208 | jne .single_word  ; either a literal or a static function ptr
209 | mov rdx,[rax]     ; tag word
210 | test dl,7
211 | jz .closure
212 | 
213 | .single_word:
214 | mov dl,al
215 | and dl,7
216 | cmp dl,2
217 | jne .app_to_nonlambda
218 | .static_funptr:
219 | ; TODO: maybe jump to somewhere in the middle of .closure to solve this?
220 | 
221 | ; fatal error: application to non-lambda and non-constructor
222 | .app_to_nonlambda:
223 | mov rsi,errmsg_app
224 | mov rdx,errmsg_app.len
225 | call rts_local_stderr_string
226 | mov rax,666
227 | jmp rts_exit
228 | 
229 | .datacon:
230 | mov rsi,rax
231 | mov r8,rdx
232 | mov r9,rdx
233 | shr r8,16
234 | and r8,0xffff    ; r8 = con tag
235 | shr r9,3
236 | and r9,0x1fff    ; r9 = old arity
237 | mov r10,r9
238 | add r10,rcx      ; r10 = new arity
239 | DYN_ALLOCATE_DATACON r8,r10
240 | mov rdi,rax
241 | push rcx
242 | mov rcx,r9
243 | mov rbx,8
244 | .copy_old_args:
245 | mov rax,[rsi+rbx]
246 | mov [rdi+rbx],rax
247 | add rbx,8
248 | .loop copy_old_args
249 | pop rcx
250 | .copy_new_args:
251 | mov rax,[STACK_PTR-8*rcx+8]
252 | mov [rdi+rbx],rax
253 | add rbx,8
254 | .loop copy_new_args
255 | mov rax,rdi
256 | retn
257 | 
258 | .closure:
259 | mov r8,rdx
260 | mov r9,rdx
261 | shr r8,16
262 | and r8,0xffff    ; r8 = env size
263 | shr r9,3        
264 | and r9,0x1fff    ; r9 = arity
265 | cmp rcx,r9       ; actual args vs arity
266 | je  .equal
267 | jl  .less
268 | jmp .greater
269 | 
270 | ; TODO: push the environment then push the arguments then call (callee frees)
271 | .equal:
272 | push rax
273 | or r8,r8
274 | jz .do_not_insert_env
275 | mov rsi,STACK_PTR
276 | lea rdi,[STACK_PTR + r8 * 8]
277 | .move_args_loop:
278 | sub rsi,8
279 | sub rdi,8
280 | mov rax,[rsi]
281 | mov [rdi],rax
282 | loop .move_args_loop:
283 | mov rcx,r8
284 | mov rdi,rsi
285 | pop rsi
286 | push rsi
287 | add rsi,8
288 | rep movsq
289 | .do_not_insert_env:
290 | pop rax
291 | jmp rax
292 | 
293 | ; TODO: allocate closure, copy the environment then copy the arguments
294 | .less:
295 | 
296 | ; TODO: push the environment then push the necessary arguments then call;
297 | ;       then recursively call rts_apply for the remaining args
298 | .greater:
299 | 
300 | 
301 | 


--------------------------------------------------------------------------------
/asm_shit/sys_macos.asm:
--------------------------------------------------------------------------------
 1 | 
 2 | ; MacOS syscalls
 3 | 
 4 | rts_exit: 
 5 | FROM_INT_LITERAL 
 6 | mov rdi,rax 
 7 | mov rax, 0x2000001   ; exit 
 8 | syscall 
 9 | retn 
10 | 
11 | rts_putchar_stderr: 
12 | mov rdi,2   ; stderr = 2 
13 | jmp rts_local_hputchar_literal
14 | 
15 | rts_putchar: 
16 | mov rdi,1   ; stdout = 1 
17 | ; jmp rts_local_hputchar_literal
18 | 
19 | rts_local_hputchar_literal: 
20 | FROM_INT_LITERAL   ; FROM_CHR_LITERAL 
21 | rts_local_hputchar: 
22 | push rax 
23 | mov rsi,rsp  
24 | mov rdx,1   ; length = 1 character  
25 | mov rax,0x2000004   ; write  
26 | syscall 
27 | pop rax 
28 | mov rax,NULLARY_CON(CON_Unit)   ; Unit 
29 | retn
30 | 
31 | rts_getchar: 
32 | mov rdi,0   ; stdin = 0
33 | call rts_local_hgetchar
34 | test rax,rax
35 | js .error
36 | TO_INT_LITERAL   ; TO_CHR_LITERAL
37 | mov rdx,rax
38 | ALLOCATE_DATACON CON_Just,1   ; Just
39 | mov [rax+8],rdx
40 | retn
41 | .error:
42 | mov rax,NULLARY_CON(CON_Nothing)   ; Nothing 
43 | retn
44 | 
45 | ; returns a character code or -1 if error
46 | rts_local_hgetchar: 
47 | push rax    ; alloca 8 bytes
48 | mov rsi,rsp ; buffer 
49 | mov rdx,1   ; length = 1 character  
50 | mov rax,0x2000003   ; read  
51 | syscall
52 | or rax,rax     ; bytes read (0 or 1)
53 | jnz .succeed
54 | pop rdx
55 | xor rax,rax
56 | dec rax        ; -1 = error
57 | retn
58 | .succeed:
59 | pop rax 
60 | retn
61 | 
62 | ; rsi = string ; rdx = length of the string
63 | rts_local_stderr_string:
64 | mov rdi,2   ; stderr = 2 
65 | mov rax,0x2000004   ; write  
66 | syscall 
67 | retn
68 | 
69 | rts_error: 
70 | IS_HEAP_PTR
71 | jnz .exit
72 | cmp rax,TAG_DATACON(CON_Cons,2)   ; Cons cell
73 | jnz .exit
74 | push rax
75 | mov rax,rax[8]
76 | call rts_putchar_stderr
77 | pop rax
78 | mov rax,rax[16]
79 | jmp rts_error
80 | .exit:
81 | mov rdi,666          ; exit code 
82 | mov rax, 0x2000001   ; exit 
83 | syscall 
84 | retn 
85 | 


--------------------------------------------------------------------------------
/bootstrap.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | rm nanohs_via_ghc.exe     2>/dev/null
 4 | rm nanohs_via_ghc_00.exe  2>/dev/null
 5 | rm nanohs_via_ghc_01.exe  2>/dev/null
 6 | rm nanohs_stage1.c        2>/dev/null
 7 | rm nanohs_stage2.c        2>/dev/null
 8 | rm nanohs_stage3.c        2>/dev/null
 9 | rm nanohs_stage1.exe      2>/dev/null  
10 | rm nanohs_stage2.exe      2>/dev/null  
11 | 
12 | echo "" ; echo "==================="
13 | echo "compiling a bootstrap (stage #0) compiler via GHC"
14 | ghc -O0 --make -main-is Nano.main Nano.hs -o nanohs_via_ghc_O0.exe
15 | #ghc -O1 --make -main-is Nano.main Nano.hs -o nanohs_via_ghc_O1.exe
16 | 
17 | rm *.o
18 | rm *.hi
19 | 
20 | echo "" ; echo "==================="
21 | echo "compiling a stage #1 (unoptimized) compiler via the bootstrapped one (stage #0)"
22 | ./nanohs_via_ghc_O0.exe -c Nano.hs nanohs_stage1.c 
23 | echo "running gcc on stage #1 C source..."
24 | gcc -O3 -Wl,-stack_size -Wl,0x3000000 nanohs_stage1.c -o nanohs_stage1.exe
25 | 
26 | echo "" ; echo "==================="
27 | echo "compiling a stage #2 (optimized) compiler via stage #1"
28 | ./nanohs_stage1.exe -o Nano.hs nanohs_stage2.c 
29 | echo "running gcc on stage #2 C source..."
30 | gcc -O3 -Wl,-stack_size -Wl,0x3000000 nanohs_stage2.c -o nanohs_stage2.exe
31 | 
32 | echo "" ; echo "==================="
33 | echo "compiling a stage #3 (optimized) compiler via stage #2"
34 | ./nanohs_stage2.exe -o Nano.hs nanohs_stage3.c 
35 |  
36 | echo "" ; echo "==================="
37 | echo "comparing the stage #2 and stage #3 outputs:"
38 | DIFF=`diff -q nanohs_stage2.c nanohs_stage3.c`
39 | if [[ ! -z "$DIFF" ]]
40 | then
41 |   echo $DIFF
42 | else
43 |   echo OK.
44 | fi
45 | 


--------------------------------------------------------------------------------
/examples/church.nano:
--------------------------------------------------------------------------------
 1 | 
 2 | {-# LANGUAGE NoImplicitPrelude, Rank2Types, TypeApplications #-}
 3 | 
 4 | import PrimGHC
 5 | import Prelude ( Int , undefined )
 6 | import Base
 7 | 
 8 | {-% include "Base.hs" %-}
 9 | 
10 | main = runIO action
11 | 
12 | action = iosequence_
13 |   [ putStrLn "church number example"
14 |   , print (natToInt (pow ten (intToNat 4)))
15 |   ]
16 | 
17 | data Nat = Nat (forall a. a -> (a -> a) -> a)
18 | 
19 | elim :: Nat -> a -> (a -> a) -> a
20 | elim nat z s = case nat of { Nat n -> n z s }
21 |  
22 | natToInt :: Nat -> Int
23 | natToInt n = elim n 0 inc
24 | 
25 | intToNat :: Int -> Nat
26 | intToNat n = ifte (eq n 0) zero (succ (intToNat (dec n)))
27 | 
28 | zero :: Nat 
29 | zero = Nat (\z s -> z)
30 | 
31 | succ :: Nat -> Nat 
32 | succ n = Nat (\z s -> s (elim n z s))
33 | 
34 | one   = Nat (\z s -> s (elim zero  z s)) 
35 | two   = Nat (\z s -> s (elim one   z s))
36 | three = Nat (\z s -> s (elim two   z s))
37 | four  = Nat (\z s -> s (elim three z s))
38 | five  = Nat (\z s -> s (elim four  z s))
39 | six   = Nat (\z s -> s (elim five  z s))
40 | 
41 | ten      = add five five
42 | hundred  = mul ten ten
43 | thousand = mul hundred ten
44 | 
45 | add :: Nat -> Nat -> Nat
46 | add n m = Nat (\z s -> elim m (elim n z s) s)
47 | 
48 | mul :: Nat -> Nat -> Nat
49 | mul n m = elim n zero (add m)
50 | 
51 | pow :: Nat -> Nat -> Nat
52 | pow n e = elim e one (mul n)
53 | 
54 | 


--------------------------------------------------------------------------------
/examples/test.nano:
--------------------------------------------------------------------------------
 1 | 
 2 | -- fun1 x y = plus  x y
 3 | -- fun2 a b = minus a b
 4 | 
 5 | inc x = plus  x 1
 6 | dec x = minus x 1
 7 | 
 8 | neq x y = not (eq x y)
 9 | 
10 | gt x y = lt y x
11 | 
12 | ge x y = le y x
13 | 
14 | compare x y = ifte (lt x y) LT (ifte (eq x y) EQ GT)
15 | 
16 | foldl f x0 list = go x0 list where
17 |   { go x ls = case ls of { Nil -> x ; Cons y ys -> go (f x y) ys }
18 |   }
19 | 
20 | sum = foldl plus 0
21 | 
22 | range         = rangeFrom 0
23 | rangeFrom k n = ifte (gt n 0) (Cons k (rangeFrom (inc k) (dec n))) Nil
24 | 
25 | range'     = rangeFrom' 0
26 | rangeFrom' = worker where { 
27 |   worker k n = ifte (gt n 0) (Cons k (worker (inc k) (dec n))) Nil }
28 | 
29 | reverse = foldl (\xs x -> Cons x xs) Nil
30 | 
31 | showNat n = ifte (lt n 0) (error "showNat: negative") (worker n) where
32 |   { worker n = ifte (eq n 0) "0" (reverse (go n))
33 |   ; go     n = ifte (eq n 0) Nil (Cons (chr (plus (mod n 10) 48)) (go (div n 10)))
34 |   }
35 | 
36 | main = Pair 
37 |   (showNat (sum (range' 101)))
38 |   (sum (rangeFrom 100 7)) 
39 | 
40 | 


--------------------------------------------------------------------------------
/examples/test5.nano:
--------------------------------------------------------------------------------
 1 | 
 2 | {-% include "Base.hs" %-}
 3 | 
 4 | main = runIO (ioseq action6 (ioseq sep (ioseq action7 action_debug)))
 5 | 
 6 | action_debug = iosequence_
 7 |   [ sep
 8 |   , putStrLn "some construcor names:" 
 9 |   , print (Triple 101 202 303)
10 |   , print (SomeConstructor "hello" 666)
11 |   ]
12 | 
13 | -- big_action = putStrLn (showInt (length (big2 10 10 100)))
14 | 
15 | big0     n = concat (replicate n "0123456789" )
16 | big1 m   n = concat (replicate m (big0 n)  )
17 | big2 k m n = concat (replicate k (big1 m n))
18 | 
19 | sep = putStrLn (replicate 20 '=')
20 | 
21 | action7 = iobind (readFile "a.txt") (\text -> ioseq
22 |   (putStrLn "contents of the file `a.txt`:") (putStrLn (unlines (map ff (lines text)))))
23 | 
24 | myPrintJust mb = case mb of { Just n -> putStrLn (append "fuck -> " (showInt n)) }
25 | 
26 | text  = unlines [ "alma" , "korte" , "szilva" ]
27 | text2 = unlines (map ff (lines text))
28 | ff x  = append ">>> " x
29 | 
30 | action6 = iobind getArgs (\list -> iosequence_
31 |   [ putStrLn (append "argc = " (showInt (length list)))
32 |   , iomapM_ putStrLn list
33 |   ])
34 | 
35 | 


--------------------------------------------------------------------------------
/main.hs:
--------------------------------------------------------------------------------
1 | 
2 | module Main where
3 | 
4 | import qualified Nano
5 | 
6 | main = Nano.main
7 | 
8 | 


--------------------------------------------------------------------------------
/nanohs.cabal:
--------------------------------------------------------------------------------
 1 | Cabal-Version:       2.4
 2 | Name:                nanohs
 3 | Version:             0.5
 4 | Synopsis:            nanohs - a self-hosting lambda calculus compiler
 5 | Description:         A self-hosting lambda calculus compiler. 
 6 |                      See the README fore more information.
 7 |                      
 8 | License:             BSD-3-Clause
 9 | Author:              Balazs Komuves
10 | Copyright:           (c) 2021 Balazs Komuves
11 | Maintainer:          bkomuves+hackage@gmail.com
12 | Homepage:            https://github.com/bkomuves/nanohs
13 | Stability:           Experimental
14 | Category:            Language
15 | Tested-With:         GHC == 8.6.5
16 | Build-Type:          Simple
17 | 
18 | Source-repository head
19 |   type:                git
20 |   location:            https://github.com/bkomuves/nanohs
21 | 
22 | --------------------------------------------------------------------------------
23 | 
24 | library 
25 | 
26 |   Build-Depends:       base >= 4 && < 5 
27 |   Hs-Source-Dirs:      .
28 | 
29 |   exposed-modules:     Base
30 |                        Closure
31 |                        CodeGen
32 |                        Containers
33 |                        Core
34 |                        DataCon
35 |                        Dependency
36 |                        Eval
37 |                        Inliner
38 |                        Nano
39 |                        Parser
40 |                        PrimGHC
41 |                        PrimOps
42 |                        ScopeCheck
43 |                        Syntax
44 |                        Types
45 | 
46 |   Default-Language:    Haskell2010
47 |   Default-extensions:  Strict, NoImplicitPrelude, FlexibleInstances, FlexibleContexts
48 |   Other-extensions:    CPP
49 | 
50 | --------------------------------------------------------------------------------
51 | 
52 | executable nanohs_bootstrap.exe
53 | 
54 |   Build-Depends:       base >= 4 && < 5, nanohs >= 0.5
55 |   main-is:             main.hs
56 |   Default-Language:    Haskell2010
57 |   Hs-Source-Dirs:      .
58 | 
59 | --------------------------------------------------------------------------------
60 | 


--------------------------------------------------------------------------------
/slides/.gitignore:
--------------------------------------------------------------------------------
1 | *.aux
2 | *.log
3 | *.nav
4 | *.out
5 | *.snm
6 | *.toc
7 | *.vrb
8 | 


--------------------------------------------------------------------------------
/slides/nanohs_slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bkomuves/nanohs/dd052656ba72934e3be13f9968ef280565e7b03c/slides/nanohs_slides.pdf


--------------------------------------------------------------------------------
/slides/nanohs_slides.tex:
--------------------------------------------------------------------------------
  1 | 
  2 | \documentclass{beamer}
  3 | \mode<presentation>
  4 | \usepackage{color}
  5 | \usepackage{fancyvrb}
  6 | \usefonttheme[onlymath]{serif}
  7 | 
  8 | \title{Writing a self-hosting compiler \\ for a purely functional language\\
  9 | {\footnotesize (work in progress)}}
 10 | \author{Bal\'azs K\H{o}m\H{u}ves \\ {\small [{\tt bkomuves@gmail.com}]}}
 11 | \date{2021.03.24.}
 12 | 
 13 | %------------------------------------------------------------------------------%
 14 | 
 15 | \begin{document}
 16 | 
 17 | \begin{frame}
 18 | \titlepage
 19 | \end{frame}
 20 | 
 21 | %------------------------------------------------------------------------------%
 22 | 
 23 | \begin{frame}{What is this?}
 24 | 
 25 | This is an {\bf experiment} to write a minimalistic self-hosting compiler
 26 | in a purely functional language. It's intended both as a learning
 27 | experience (for myself) and an experiment in bootstrapping.\\[15pt]
 28 | 
 29 | Goals:
 30 | \begin{itemize}
 31 | \item self-hosting compiler
 32 | \item very small code-base
 33 | \item bootstrappable using a Haskell implementation
 34 | \item generated code should have ``acceptable'' quality \\[15pt]
 35 | \end{itemize}
 36 | 
 37 | Non-goals:
 38 | \begin{itemize}
 39 | \item a fully-featured language
 40 | \item efficient compilation (the compiler can be slow)
 41 | \item bootstrapping ``real'' compilers from this code base
 42 | \end{itemize}
 43 | 
 44 | \end{frame}
 45 | 
 46 | %------------------------------------------------------------------------------%
 47 | 
 48 | \begin{frame}{Why self-host?}
 49 | 
 50 | Why do bootstrapping / self-hosting?\\[10pt]
 51 | 
 52 | \begin{itemize}
 53 | \item I like simple, self-contained stuff
 54 | \item self-hosting is a philosophically desirable property
 55 | \item future-proofness: If you can bootstrap from something simple, it won't bit-rot that easily
 56 | \item it's a test of the maturity of the compiler
 57 | \item the compiler itself is a very good test program! \\[15pt]
 58 | \end{itemize}
 59 | 
 60 | Exhibit A: Maru, a tiny self-hosting LISP dialect was successfully revived and
 61 | extended by Attila.\\[15pt]
 62 | 
 63 | Exhibit B: The York Haskell Compiler (forked from {\tt nhc98}) is now dead, apparently
 64 | because \emph{nobody knows how to build it anymore}.
 65 | 
 66 | \end{frame}
 67 | 
 68 | %------------------------------------------------------------------------------%
 69 | 
 70 | \begin{frame}{The language}
 71 | 
 72 | The syntax and semantics is based on (strict) Haskell - the idea is that the compiler
 73 | should be compilable by both GHC and itself.
 74 | 
 75 | This makes development much easier!\\[20pt]
 76 | 
 77 | The language is basically untyped call-by-value lambda calculus with data constructors
 78 | and recursive lets.
 79 | During parsing, Haskell features like type annotations, data type declarations, imports etc
 80 | are simply ignored - but the compiler itself is a well-typed program!\\[20pt]
 81 | 
 82 | In fact, a type checker should be probably added, after all; but I wanted to keep it simple.
 83 | 
 84 | \end{frame}
 85 | 
 86 | %------------------------------------------------------------------------------%
 87 | 
 88 | \begin{frame}{Language (non-)features}
 89 | 
 90 | {\small
 91 | \begin{itemize}
 92 | \item no static type system (untyped lambda calculus) 
 93 | \item na data type declarations (constructors are arbitrary capitalized names)
 94 | \item no module system - instead, we have C-style includes
 95 | \item strict language (exceptions: if-then-else, logical {\tt and}/{\tt or})
 96 | \item ML-style side effects, but wrapped in a monad
 97 | \item only simple pattern matching + default branch (TODO: nested patterns)
 98 | \item no infix operators
 99 | \item list construction syntax {\tt [a,b,c]} is supported
100 | \item no indentation syntax (only curly braces), except for top-level blocks
101 | \item only line comments, starting at the beginning of the line
102 | \item no escaping in character / string constants yet
103 | \item (universal polymorphic equality comparison primop)
104 | \end{itemize}
105 | }
106 | 
107 | \end{frame}
108 | 
109 | %------------------------------------------------------------------------------%
110 | 
111 | \begin{frame}{Compilation pipeline}
112 | 
113 | \begin{enumerate}
114 | \item lexing and parsing
115 | \item collecting string literals
116 | \item partition recursive lets into strongly connected components
117 | \item TODO: eliminate pattern matching into simple branching on constructors
118 | \item recognize primops
119 | \item collect data constructors
120 | \item scope checking \& conversion to core language
121 | \item inline small functions + some basic optimizations
122 | \item closure conversion
123 | \item TODO: compile to a low-level intermediate language
124 | \item final code generation 
125 | \end{enumerate}
126 | 
127 | \end{frame}
128 | 
129 | %------------------------------------------------------------------------------%
130 | 
131 | \begin{frame}{The runtime system}
132 | 
133 | The runtime system is relatively straightforward:
134 | 
135 | {\small
136 | \begin{itemize}
137 | \item there is a heap, which can only contain closures and data constructors
138 | \item there is stack (separate from the C stack) which contains pointers to the heap
139 | \item heap pointers are tagged using the lowest 3 bits
140 | \item heap objects fitting into 61 bits (ints, chars, nullary constructors, static functions) are
141 |       not allocated, just stored in registers / on stack
142 | \item statically unknown application is implemented as a runtime primitive 
143 | \item there is a simple copying GC - the GC roots are simply the content of the stack
144 |       (plus statically known data)
145 | \item there are some debug features like printing heap objects
146 | \item primops: integer arithmetic; integer comparison; bitwise operations; lazy {\tt ifte/and/or}; basic IO.
147 | \end{itemize}
148 | }
149 | 
150 | \end{frame}
151 | 
152 | %------------------------------------------------------------------------------%
153 | 
154 | \begin{frame}{Current state of the experiment}
155 | 
156 | Repo: \url{https://github.com/bkomuves/nanohs}\\[15pt]
157 | 
158 | Current state:
159 | \begin{itemize}
160 | \item it can compile itself successfully;
161 | \item the source code is not as nice as it could be;
162 | \item the generated code is pretty horrible;
163 | \item there are some desirable features missing.\\[15pt]
164 | \end{itemize}
165 | 
166 | Code size:
167 | \begin{itemize}
168 | \item the compiler: $\sim 1900$ lines (required code)
169 | \item type annotations: $+525$ lines (ignored)
170 | \item the C runtime: $\sim 600$ lines (including some debug features)
171 | \end{itemize}
172 | 
173 | \end{frame}
174 | 
175 | %------------------------------------------------------------------------------%
176 | 
177 | \begin{frame}{Mistakes I made}
178 | 
179 | {\small
180 | I made a huge amount of mistakes during the development, which made it much
181 | harder and longer than I expected.
182 | 
183 | \begin{itemize}
184 | \item Trying to self-host before the compiler actually works. Writing a 
185 | compiler is hard work, you don't want to do it in a limited language!
186 | \item Not figuring out the precise semantics upfront:
187 |    \begin{itemize}
188 |    \item exact details of lazyness vs. strictness
189 |    \item controlling exactly where heap / stack allocations can happen
190 |    \item recursive lets 
191 |    \end{itemize}
192 | \item Not having debugging support from early on (for example: good error messages, 
193 |       threading names \& source locations, RTS features, pretty printing AST-s)
194 | \item Trying to target assembly first, instead of a simpler target like C; it's just extra cognitive load
195 | \item Generating code directly, without having a low-level IL
196 | \item Using de Bruijn indices (as opposed to levels) in the generated code
197 | \end{itemize}
198 | }
199 | 
200 | \end{frame}
201 | 
202 | %------------------------------------------------------------------------------%
203 | 
204 | \begin{frame}{Bug gallery}
205 | 
206 | {\small
207 | Some examples of bugs which were really painful to debug:
208 | 
209 | \begin{itemize}
210 | \item IO ordering bugs (GHC \emph{really} likes to optimize my IO implementations away...)
211 | \item GC liveness bugs (see the next slide)
212 | \item stack management, and using de Bruijn indices in the generated code
213 | \item C stack overflow gives you a segfault with no information
214 | \item thunks (there shouldn't be any, this is supposed to be a strict language after all...)
215 | \item recursive lets are a PITA if you allow non-lambdas
216 | \item there was a very simple bug (wrong variable name) in the implementation of generic
217 | application in the RTS which was not triggered by compiling the whole compiler, only
218 | after I did inlining
219 | \item inlining code which does computation makes the runtime blow up exponentially,
220 |       which looks \emph{very} similar to an infinite loop...
221 | \end{itemize}
222 | }
223 | 
224 | \end{frame}
225 | 
226 | %------------------------------------------------------------------------------%
227 | 
228 | \begin{frame}[fragile]{Example GC bug - Marshalling from C strings}
229 | 
230 | {\footnotesize
231 | \begin{verbatim}
232 |        heap_obj marshal_from_cstring(char *cstring) { ...
233 |        
234 | v1:    obj    = heap_allocate(Cons,2);
235 |        obj[1] = cstring[0];
236 |        obj[2] = marshal_from_cstring(cstring+1);
237 |        return obj;
238 |        
239 | v2:    rest   = marshal_from_cstring(cstring+1);
240 |        obj    = heap_allocate(Cons,2);
241 |        obj[1] = cstring[0];
242 |        obj[2] = rest;
243 |        return obj;
244 |        
245 | v3:    rest   = marshal_from_cstring(cstring+1);
246 |                 push(rest);
247 |        obj    = heap_allocate(Cons,2);
248 |        obj[1] = cstring[0];
249 |        obj[2] = pop();
250 |        return obj;
251 | \end{verbatim}
252 | }
253 | 
254 | \end{frame}
255 | 
256 | %------------------------------------------------------------------------------%
257 | 
258 | \begin{frame}{Future work}
259 | 
260 | I'm not convinced that it is worth hacking on this toy compiler (as opposed to work
261 | on a full-featured ``real'' compiler), but in any case, here is a TODO list:\\[10pt]
262 | 
263 | \begin{itemize}
264 | \item add nested patterns, and patterns in lambda arguments
265 | \item do some optimizations, so that the generated code is better
266 | \item tail call elimination
267 | \item escaping in string constants
268 | \item compile to a low-level intermediate language first, and do codegen from that
269 | \item add a type system
270 | \item more backends: x86-64 assembly and/or machine code; some simple VM
271 | \end{itemize}
272 | 
273 | \end{frame}
274 | 
275 | 
276 | %------------------------------------------------------------------------------%
277 | 
278 | \end{document}
279 | 


--------------------------------------------------------------------------------
/sloc_count.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | SRC="Base.hs Closure.hs CodeGen.hs Containers.hs Core.hs DataCon.hs Dependency.hs Eval.hs Inliner.hs Nano.hs Parser.hs PrimOps.hs ScopeCheck.hs Syntax.hs Types.hs"
 4 | 
 5 | FULL=`cat                $SRC | wc -l | sed -E 's/[ ]*([1-9]+)/\1/g'` 
 6 | ANNT=`grep "::"          $SRC | wc -l | sed -E 's/[ ]*([1-9]+)/\1/g'`
 7 | TYPE=`grep "^type "      $SRC | wc -l | sed -E 's/[ ]*([1-9]+)/\1/g'`
 8 | DATA=`grep "^data "      $SRC | wc -l | sed -E 's/[ ]*([1-9]+)/\1/g'`
 9 | DCON=`grep "^  |"        $SRC | wc -l | sed -E 's/[ ]*([1-9]+)/\1/g'`
10 | DERV=`grep "^  deriving" $SRC | wc -l | sed -E 's/[ ]*([1-9]+)/\1/g'`
11 | 
12 | HASKELL_CLOC=`cloc --quiet --csv $SRC | grep Haskell`
13 | ASM_CLOC=`cloc --quiet --csv rts.asm sys_macos.asm | grep Assembly`
14 | C_CLOC=`cloc --quiet --csv rts.c | grep C`
15 | 
16 | IFS=',' #setting comma as delimiter  
17 | read -a arr <<< "$HASKELL_CLOC" 
18 | BLNK=${arr[2]}
19 | CMMT=${arr[3]}
20 | HSLN=${arr[4]}
21 | 
22 | read -a arr <<< "$ASM_CLOC" 
23 | ASML=${arr[4]}
24 | 
25 | read -a arr <<< "$C_CLOC" 
26 | CRTS=${arr[4]}
27 | 
28 | echo "raw lines         = $FULL"
29 | echo "blank             = $BLNK"
30 | echo "comment           = $CMMT"
31 | echo "haskell lines     = $HSLN"
32 | echo "type annotations  = $ANNT"
33 | echo "type aliases      = $TYPE"
34 | echo "data declarations = $DATA"
35 | echo "data constructors = $DCON"
36 | echo "deriving          = $DERV"
37 | 
38 | IGNORED=`expr $ANNT + $TYPE + $DATA + $DCON + $DERV`
39 | ESSENTIAL=`expr $HSLN - $IGNORED`
40 | 
41 | echo "============================"
42 | echo "haskell lines     = $HSLN"
43 | echo "from this ignored = $IGNORED"
44 | echo "essential lines   = $ESSENTIAL"
45 | echo "C runtime         = $CRTS"
46 | echo "assembly runtime  = $ASML"
47 | 


--------------------------------------------------------------------------------
/topsort/TopSortSCC.hs:
--------------------------------------------------------------------------------
  1 | 
  2 | {-# LANGUAGE Strict, FlexibleContexts, PatternSynonyms #-}
  3 | module TopSortSCC where
  4 | 
  5 | import Control.Monad
  6 | import Control.Monad.State.Strict
  7 | 
  8 | import qualified Data.Map.Strict as Map ; import Data.Map.Strict (Map)
  9 | import qualified Data.Set        as Set ; import Data.Set        (Set)
 10 | 
 11 | import Data.List
 12 | import System.Random
 13 | 
 14 | --------------------------------------------------------------------------------
 15 | 
 16 | ex = Map.fromList exlist
 17 | 
 18 | exlist =
 19 |   [ ( "a" , [ "a" , "b" ]            )
 20 |   , ( "b" , [ "c" , "d" , "e" , "j"] )
 21 |   , ( "c" , [ "c" ]                  )
 22 |   , ( "d" , []                       )
 23 |   , ( "e" , [ "d" ]                  )
 24 |   , ( "f" , [ "g" ]                  )
 25 |   , ( "g" , [ "h" , "i" ]            )
 26 |   , ( "h" , [ "f" , "j" ]            )
 27 |   , ( "i" , []                       )
 28 |   , ( "j" , [ "k" , "l" ]            )
 29 |   , ( "l" , [ "l" ]                  )
 30 |   , ( "k" , []                       )         
 31 |   ]
 32 | 
 33 | pattern Pair x y = (x,y)
 34 | ex2 = Map.fromList [Pair "foldl" [],Pair "main" ["foldl"],Pair "map" []]
 35 | 
 36 | --------------------------------------------------------------------------------
 37 | 
 38 | type Vtx   = String
 39 | type Graph = Map Vtx [Vtx]
 40 | 
 41 | graphToList :: Graph -> [(Vtx,Vtx)]
 42 | graphToList g = [ (v,w) | (v,ws) <- Map.toList g , w <- ws ]
 43 | 
 44 | graphFromList :: [(Vtx,Vtx)] -> Graph 
 45 | graphFromList = buildMap (:[]) (:) 
 46 | 
 47 | flipGraph :: Graph -> Graph
 48 | flipGraph graph = insertKeys $ graphFromList $ map swap $ graphToList graph where
 49 |   swap (x,y) = (y,x)
 50 |   keys = Map.keys graph
 51 |   insertKeys g = foldl (\old k -> Map.alter h k old) g keys
 52 |   h Nothing   = Just []
 53 |   h (Just ys) = Just ys
 54 | 
 55 | buildMap :: Ord k => (a -> b) -> (a -> b -> b) -> [(k,a)] -> Map k b
 56 | buildMap f g xys = foldl' insert Map.empty xys where
 57 |   insert old (k,x) = Map.alter (h x) k old
 58 |   -- h :: a -> Maybe b -> Maybe b
 59 |   h x Nothing  = Just (f x  )
 60 |   h x (Just y) = Just (g x y)
 61 | 
 62 | randomGraph :: Int -> Double -> IO Graph
 63 | randomGraph n threshold = 
 64 |   do
 65 |     zs <- replicateM n2 $ randomRIO (0,1)
 66 |     return $ graphFromList [ (f i, f j) | (z,(i,j)) <- zip zs edges , z < threshold ]
 67 |   where
 68 |     n2 = n*n
 69 |     edges = [ (i,j) | i<-[1..n], j<-[1..n] ]
 70 |     f i = "v" ++ show i
 71 | 
 72 | test = do
 73 |   forM_ [0.05,0.10..0.95] $ \threshold -> do
 74 |     putStrLn $ "threshold = " ++ show threshold
 75 |     graphs <- replicateM 25 (randomGraph 15 threshold) 
 76 |     print $ map checkSCC graphs
 77 | 
 78 | --------------------------------------------------------------------------------
 79 | 
 80 | checkSCC :: Graph -> Bool
 81 | checkSCC graph = case checkSCC' graph of
 82 |   (b1,b2s,b3s) -> {- b1 && -} (and b2s) && (and b3s)
 83 |   -- the vertex set is not really known
 84 | 
 85 | checkSCC' graph = ( checkSet , checkOrder , checkSC ) where
 86 | 
 87 |   scc = reverse $ tarjanSCC (flipGraph graph)
 88 | 
 89 |   lkpNeighs v = Map.findWithDefault [] v graph
 90 | 
 91 |   -- does the result covers the vertex set?
 92 |   checkSet = sort (concat scc) == Map.keys graph
 93 | 
 94 |   -- is it ordered (topologically sorted)?
 95 |   checkOrder = worker scc where
 96 |     worker []     = []
 97 |     worker [this] = []
 98 |     worker (this:rest) = thisOK : worker rest where
 99 |       thisOK = and [ w `elem` these | v <- this , w <- lkpNeighs v ] where
100 |       these  = this ++ concat rest
101 | 
102 |   -- are the components really SCC-s (can you reach each vertex from each other vertex?)
103 |   checkSC = (map isSCC scc)
104 | 
105 |   isSCC vtxset = 
106 |     and [ w `elem` reachv | v <- vtxset , let reachv = Set.toList (reachableSet graph v) , w <- vtxset ]
107 |   
108 | reachableSet :: Graph -> Vtx -> Set Vtx
109 | reachableSet graph vtx0 = execState (go vtx0) (Set.empty) where
110 |   go :: Vtx -> State (Set Vtx) ()
111 |   go v = do
112 |     visited <- get 
113 |     unless (Set.member v visited) $ do
114 |       put (Set.insert v visited)
115 |       let neighs = Map.findWithDefault [] v graph :: [Vtx] 
116 |       mapM_ go neighs
117 | 
118 | --------------------------------------------------------------------------------
119 | -- * Tarjan's topologically sorted SCC algorithm
120 | 
121 | data Tarjan = Tarjan 
122 |   { _next   :: Int
123 |   , _stack  :: [Vtx] 
124 |   , _links  :: Map Vtx (Int,Int)   -- ^ index and low-linkg
125 |   , _output :: [[Vtx]]
126 |   }
127 |   deriving Show
128 | 
129 | -- | Based on <https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm>
130 | tarjanSCC :: Graph -> [[Vtx]]
131 | tarjanSCC graph = _output (execState (mapM_ worker vtxSet) iniState) where
132 | 
133 |   iniState = Tarjan 0 [] Map.empty []
134 | 
135 |   vtxSet = Map.keys graph
136 | 
137 |   worker v = do
138 |     get >>= \state -> case Map.lookup v (_links state) of
139 |       Nothing -> scc v
140 |       Just _  -> return ()
141 | 
142 |   scc v = do
143 |     -- init
144 |     Tarjan next stack links output <- get
145 |     let stack' = v : stack
146 |         links' = Map.insert v (next,next) links
147 |         next' = next + 1
148 |     put (Tarjan next' stack' links' output)
149 | 
150 |     -- successor vertices
151 |     forM_ (Map.findWithDefault [] v graph) $ \w -> do
152 |       state <- get 
153 |       case Map.lookup w (_links state) of
154 | 
155 |         Nothing -> do
156 |           scc w
157 |           state <- get 
158 |           let links = _links state 
159 |           case Map.lookup v links of { Just (vi,vl) -> 
160 |             case Map.lookup w links of { Just (_ ,wl) -> 
161 |               put $ state { _links = Map.insert v (vi, min vl wl) links } } } 
162 | 
163 |         Just (wi,wl) -> when (w `elem` (_stack state)) $ do
164 |           let links = _links state 
165 |           case Map.lookup v links of { Just (vi,vl) -> 
166 |             case Map.lookup w links of { Just (wi,_ ) -> 
167 |               put $ state { _links = Map.insert v (vi, min vl wi) links } } } 
168 | 
169 |     -- pop and generate
170 |     get >>= \state -> case Map.lookup v (_links state) of 
171 |       Just (vi,vl) -> when (vi == vl) $ do
172 |         Tarjan next stack links output <- get
173 |         let (this , _v:stack') = span (/=v) stack      
174 |         unless (_v == v) $ error "fatal"
175 |         let output' = (v:this) : output
176 |         put (Tarjan next stack' links output')
177 | 
178 | --------------------------------------------------------------------------------
179 | 
180 | 


--------------------------------------------------------------------------------