├── .gitignore ├── LICENSE ├── HOAS.hs ├── DeBruijn.hs ├── README.md ├── Main.hs ├── Convert.hs └── Sharing.hs /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | *.hi 3 | 4 | *.o 5 | 6 | Main 7 | 8 | hoas-conv 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) [2013] Manuel M T Chakravarty. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | * Redistributions of source code must retain the above copyright 6 | notice, this list of conditions and the following disclaimer. 7 | * Redistributions in binary form must reproduce the above copyright 8 | notice, this list of conditions and the following disclaimer in the 9 | documentation and/or other materials provided with the distribution. 10 | * Neither the names of the contributors nor of their affiliations may 11 | be used to endorse or promote products derived from this software 12 | without specific prior written permission. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY 15 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS BE LIABLE FOR ANY 18 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | -------------------------------------------------------------------------------- /HOAS.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE GADTs, StandaloneDeriving, DeriveDataTypeable, NoMonomorphismRestriction #-} 2 | 3 | module HOAS where 4 | 5 | import Data.Typeable 6 | import Text.Show.Functions 7 | 8 | 9 | -- The level of lambda-bound variables. The root has level 0; then it increases with each bound 10 | -- variable — i.e., it is the same as the size of the environment at the defining occurence. 11 | -- 12 | type Level = Int 13 | 14 | -- Lambda terms in higher-order abstract syntax 15 | -- 16 | -- * We don't care about exotic terms here, and hence don't use a parametrised representation. 17 | -- * The `Typeable' contexts and the `Tag' variant are in preparation for being able to convert to a 18 | -- de Bruijn representation. 19 | -- 20 | data Term t where 21 | -- for conversion to de Bruijn 22 | Tag :: Typeable t => Level -> Term t 23 | 24 | Con :: (Typeable t, Show t) => t -> Term t 25 | Lam :: (Typeable s, Typeable t, Show s, Show t) => (Term s -> Term t) -> Term (s -> t) 26 | App :: (Typeable s, Typeable t, Show s, Show t) => Term (s -> t) -> Term s -> Term t 27 | 28 | deriving instance Typeable1 Term 29 | 30 | showTermOp :: Term t -> String 31 | showTermOp (Tag lvl) = "Tag " ++ show lvl 32 | showTermOp (Con v) = "Con " ++ show v 33 | showTermOp (Lam {}) = "Lam" 34 | showTermOp (App {}) = "App" 35 | 36 | -- Term constructors 37 | -- 38 | con = Con 39 | lam = Lam 40 | app = App 41 | 42 | -- A term interpreter for closed terms 43 | -- 44 | intp :: Show t => Term t -> t 45 | intp (Tag ix) = error "HOAS.intp: Tag is only for conversion" 46 | intp (Con v) = v 47 | intp (Lam fun) = intp . fun . Con 48 | intp (App fun arg) = (intp fun) (intp arg) 49 | -------------------------------------------------------------------------------- /DeBruijn.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE GADTs #-} 2 | 3 | module DeBruijn where 4 | 5 | import Text.Show.Functions 6 | 7 | import Data.Char 8 | 9 | 10 | -- Type environments are nested pairs (..((), t1), t2, ..., tn) 11 | 12 | -- Index projecting a specific type from a type environment; it's value 13 | -- corresponds to a natural number, the de Brujin index 14 | -- 15 | data Idx env t where 16 | ZeroIdx :: Idx (env, t) t 17 | SuccIdx :: Idx env t -> Idx (env, s) t 18 | 19 | instance Show (Idx env t) where 20 | show = show . idxToInt 21 | 22 | idxToInt :: Idx env t -> Int 23 | idxToInt ZeroIdx = 0 24 | idxToInt (SuccIdx n) = idxToInt n + 1 25 | 26 | -- Lambda terms using de Bruijn indices to represent variables 27 | -- 28 | data Term env t where 29 | Var :: Idx env t -> Term env t 30 | Con :: Show t 31 | => t -> Term env t 32 | Lam :: Term (env, s) t -> Term env (s -> t) 33 | App :: Term env (s -> t) -> Term env s -> Term env t 34 | Let :: Term env s -> Term (env, s) t -> Term env t 35 | 36 | instance Show (Term env t) where 37 | show = showTerm 38 | where 39 | showTerm (Var ix) = "#" ++ show ix 40 | showTerm (Con c) = show c 41 | showTerm (Lam body) = "\\" ++ show body 42 | showTerm (App fun arg) = showParen fun ++ " " ++ showParen arg 43 | showTerm (Let bnd body) = "let " ++ show bnd ++ " in " ++ show body 44 | 45 | showParen t@(Var {}) = show t 46 | showParen t@(Con {}) = show t 47 | showParen t = "(" ++ show t ++ ")" 48 | 49 | pprTerm :: Term env t -> String 50 | pprTerm = ppr (-1) 51 | where 52 | ppr :: Int -> Term env t -> String 53 | ppr lvl (Var ix) = pprIdx lvl ix 54 | ppr lvl (Con c) = show c 55 | ppr lvl (Lam body) = "\\" ++ pprIdx (lvl + 1) ZeroIdx ++ ". " ++ ppr (lvl + 1) body 56 | ppr lvl (App fun arg) = pprParen lvl fun ++ " " ++ pprParen lvl arg 57 | ppr lvl (Let bnd body) = "let " ++ pprIdx (lvl + 1) ZeroIdx ++ " = " ++ ppr lvl bnd ++ 58 | " in " ++ ppr (lvl + 1) body 59 | 60 | pprParen :: Int -> Term env t -> String 61 | pprParen lvl t@(Var {}) = ppr lvl t 62 | pprParen lvl t@(Con {}) = ppr lvl t 63 | pprParen lvl t = "(" ++ ppr lvl t ++ ")" 64 | 65 | pprIdx :: Int -> Idx env t -> String 66 | pprIdx lvl idx 67 | | n < 26 = [chr (ord 'a' + n)] 68 | | otherwise = 'v':show n 69 | where 70 | n = lvl - idxToInt idx 71 | 72 | -- Valuation for a type environment 73 | -- 74 | data Val env where 75 | Empty :: Val () 76 | Push :: Val env -> t -> Val (env, t) 77 | 78 | -- Projection of a value from a valuation using a de Bruijn index 79 | -- 80 | prj :: Idx env t -> Val env -> t 81 | prj ZeroIdx (Push val v) = v 82 | prj (SuccIdx idx) (Push val _) = prj idx val 83 | 84 | -- A term interpreter, evaluating a term under a valuation 85 | -- 86 | intp :: Term env t -> Val env -> t 87 | intp (Var ix) val = prj ix val 88 | intp (Con v) val = v 89 | intp (Lam body) val = intp body . (val `Push`) 90 | intp (App fun arg) val = (intp fun val) (intp arg val) 91 | intp (Let bnd body) val = intp body (val `Push` intp bnd val) 92 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Converting a HOAS term GADT into a de Bruijn term GADT 2 | ====================================================== 3 | 4 | Two approaches to representing variables in a typed term language implemented as a [GADT](http://en.wikipedia.org/wiki/GADT) are (1) to use higher-order abstract syntax (HOAS) or to use de Bruijn indices.. Both approaches are convenient for manipulating terms as they require no explicit alpha conversion and avoid name capture during substitution. Nevertheless, there are trade offs between the two. In particular, the HOAS representation doesn't support comparing variable names and the de Bruijn representation is inconvenient for humans to read and write. For a more detailed discussion, see for example Conor McBride and James McKinna's [I am not a number: I am a free variable](http://www.strictlypositive.org/notanum.ps.gz), where they discuss a mixed representation using de Bruijn indices for bound variables and variables of the meta-language for free variables. 5 | 6 | The tension between the HOAS and de Bruijn representation also has relevance for the design and implementation of embedded domain specific languages (EDSLs) — aka internal languages. When an internal includes higher-order functions, it is usually most convenient for the user to use the function abstraction mechanisms of the host language. However, to execute or compile the internal language, de Bruijn notation is often more convenient; at least, if optimisations are performed. 7 | 8 | An obvious way to relief the tension between the representations is to use HOAS in the surface representation of the internal language and to convert that to the de Bruijn representation before optimisation and execution. However, the conversion is not entirely straight forward for a strongly typed internal language with typed intermediate representations and type-preserving transformations between those representations. The difficulties were already mentioned by Louis-Julien Guillemette and Stefan Monnier in their Haskell'07 paper [A Type-Preserving Closure Conversion in Haskell](http://www.iro.umontreal.ca/~monnier/tcm.pdf) (which is concerned with type-preserving compilation in general and doesn't specifically address internal languages). However, their description of the conversion (in Section 5 of the paper) is sketchy and intermingled with concerns specific to their application. 9 | 10 | The following Haskell code demonstrates the type-preserving conversion from a HOAS to a de Bruijn representation for a simple term language (essentially the lambda calculus with constants of arbitrary type). The two important aspects of the method are two: 11 | 12 | The HOAS term language requires an extra variant (called Tag in the code) that reifies variables during the conversion — otherwise, a HOAS representation doesn't explicitly represent variables. We require an explicit type representation (of the types of bound variables) during the conversion. We use Data.Typeable for that purpose. The implementation comprises five Haskell modules: 13 | 14 | * [`HOAS.hs`](HOAS.hs): Typed terms in higher-order abstract syntax 15 | * [`DeBruijn.hs`](DeBruijn.hs): Typed terms using de Bruijn notation 16 | * [`Sharing.hs`](Sharing.hs): Typed sharing recovery as a pre-processing step before conversion to de Bruijn notation 17 | * [`Convert.hs`](Convert.hs): Type-preserving conversion from HOAS to de Bruijn 18 | * [`Main.hs`](Main.hs): Some example conversions 19 | 20 | (This code has been tested with GHC 7.4.2.) 21 | 22 | __NB__ After completing the HOAS to de Bruijn (without sharing recovery) and publishing it on my website, I learnt that Robert Atkey, Sam Lindley & Jeremy Yallop had independently developed the same method and were about to publish it in a paper entitled [Unembedding Domain-Specific Languages](http://homepages.inf.ed.ac.uk/slindley/papers/unembedding.pdf) in the ACM SIGPLAN Haskell Symposium 2009. That paper discusses other related issues as well and is based on a somewhat different representation of HOAS terms. 23 | -------------------------------------------------------------------------------- /Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ExtendedDefaultRules #-} 2 | {-# LANGUAGE DeriveDataTypeable #-} 3 | 4 | import Data.Typeable 5 | 6 | import qualified DeBruijn 7 | import qualified HOAS 8 | import HOAS (con, lam, app) 9 | import Convert 10 | 11 | i = HOAS.lam $ \x -> x 12 | 13 | zero = HOAS.lam $ \f -> HOAS.lam $ \x -> x 14 | one = HOAS.lam $ \f -> HOAS.lam $ \x -> f `HOAS.app` x 15 | two = HOAS.lam $ \f -> HOAS.lam $ \x -> f `HOAS.app` (f `HOAS.app` x) 16 | three = HOAS.lam $ \f -> HOAS.lam $ \x -> 17 | f `HOAS.app` (f `HOAS.app` (f `HOAS.app` x)) 18 | 19 | plus = HOAS.lam $ \m -> 20 | HOAS.lam $ \n -> 21 | HOAS.lam $ \f -> 22 | HOAS.lam $ \x -> m `HOAS.app` f `HOAS.app` (n `HOAS.app` f `HOAS.app` x) 23 | 24 | plusTwoThree = plus `HOAS.app` two `HOAS.app` three 25 | 26 | plusTwoTwoLet = let a = two in plus `app` a `app` a 27 | 28 | data Nat = Z | S Nat deriving (Show, Typeable) 29 | 30 | pair = HOAS.lam $ \x -> 31 | HOAS.lam $ \y -> 32 | HOAS.lam $ \z -> z `HOAS.app` x `HOAS.app` y 33 | pairfst = HOAS.lam $ \p -> p `HOAS.app` (HOAS.lam $ \x -> HOAS.lam $ \y -> x) 34 | pairsnd = HOAS.lam $ \p -> p `HOAS.app` (HOAS.lam $ \x -> HOAS.lam $ \y -> y) 35 | 36 | pairfstPair = pairfst `HOAS.app` 37 | (pair `HOAS.app` (HOAS.con 'a') `HOAS.app` (HOAS.con 'b')) 38 | 39 | testSharing :: (Eq t, Show t, Typeable t) => HOAS.Term t -> IO () 40 | testSharing t 41 | | sharingResult == nonSharingResult 42 | = putStrLn $ DeBruijn.pprTerm (convertSharing t) ++ ": OK" 43 | | otherwise 44 | = do 45 | { putStrLn $ DeBruijn.pprTerm (convert t) ++ ": OK" 46 | ; putStrLn $ " Without sharing: " ++ show nonSharingResult 47 | ; putStrLn $ " With sharing : " ++ show sharingResult 48 | } 49 | where 50 | nonSharingResult = DeBruijn.intp (convert t) DeBruijn.Empty 51 | sharingResult = DeBruijn.intp (convertSharing t) DeBruijn.Empty 52 | 53 | main 54 | = do 55 | printLine "Identity :" (convert i) 56 | printLine "zero :" (convert zero) 57 | printLine "one :" (convert one) 58 | printLine "two :" (convert two) 59 | printLine "three :" (convert three) 60 | printLine "plus :" (convert plus) 61 | printLine "plus two three :\n " (convert plusTwoThree) 62 | printLine "let x = two in plus x x:\n " (convert plusTwoTwoLet) 63 | printLine "let x = two in plus x x:\n " (convertSharing plusTwoTwoLet) 64 | putStrLn " (with sharing)" 65 | printLine' "EVAL plus two three:" 66 | ((DeBruijn.intp (convert plusTwoThree)) DeBruijn.Empty S Z) 67 | printLine "pairfst (pair 'a' 'b'):\n " (convert pairfstPair) 68 | printLine' "EVAL pairfst (pair 'a' 'b'):" 69 | (DeBruijn.intp (convert pairfstPair) DeBruijn.Empty) 70 | 71 | -- [too polymorphic: let y = \a -> (1::Int) in let x = let z = (\c d -> c) y in z z in x ((\b -> x) y)] 72 | -- let y = \a -> 1 in let x = let z = (\c -> c) y in (\d -> z) z in (\e -> x 2) ((\b -> x) y) 73 | testSharing (let y = lam $ \a -> con (1::Int) 74 | in let x = let z = (lam $ \c -> c) `app` y in (lam $ \d -> z) `app` z 75 | in 76 | (lam $ \e -> x `app` con (2::Int)) `app` ((lam $ \b -> x) `app` y)) 77 | testSharing (let inc = con (+) `app` con 1 78 | in let nine = let three = inc `app` con 2 in (con (*)) `app` three `app` three 79 | in 80 | con (-) `app` (inc `app` nine) `app` nine) 81 | testSharing (let plus = lam $ \x -> lam $ \y -> let z = con (*) `app` x `app` y 82 | in con (+) `app` z `app` z 83 | in let inc = plus `app` con 1 84 | in 85 | plus `app` (inc `app` (inc `app` con 1)) `app` con 42) 86 | where 87 | printLine desc e = putStrLn $ desc ++ " " ++ show e ++ " — " ++ DeBruijn.pprTerm e 88 | printLine' desc e = putStrLn $ desc ++ " " ++ show e 89 | -------------------------------------------------------------------------------- /Convert.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE GADTs, ScopedTypeVariables #-} 2 | 3 | module Convert (convert, convertSharing) where 4 | 5 | -- standard libraries 6 | import Data.List 7 | import Data.Typeable 8 | import Data.Maybe 9 | 10 | -- friends 11 | import qualified DeBruijn 12 | import qualified HOAS 13 | import Sharing 14 | 15 | 16 | -- A layout of an environment an entry for each entry of the environment. 17 | -- Each entry in the layout holds the deBruijn index that refers to the 18 | -- corresponding entry in the environment. 19 | -- 20 | data Layout env env' where 21 | EmptyLayout :: Layout env () 22 | PushLayout :: Typeable t 23 | => Layout env env' -> DeBruijn.Idx env t -> Layout env (env', t) 24 | 25 | -- Yield the number of entries in an environment layout 26 | -- 27 | size :: Layout env env' -> Int 28 | size EmptyLayout = 0 29 | size (PushLayout lyt _) = size lyt + 1 30 | 31 | -- Add an entry to a layout, incrementing all indices 32 | -- 33 | inc :: Layout env env' -> Layout (env, t) env' 34 | inc EmptyLayout = EmptyLayout 35 | inc (PushLayout lyt ix) = PushLayout (inc lyt) (DeBruijn.SuccIdx ix) 36 | 37 | -- Project the nth index out of an environment layout. 38 | -- 39 | -- All errors are internal errors. 40 | -- 41 | prjIdx :: forall t env env'. Typeable t => String -> Int -> Layout env env' -> DeBruijn.Idx env t 42 | prjIdx ctxt 0 (PushLayout _ (idx :: DeBruijn.Idx env0 t0)) 43 | = case gcast idx of 44 | Just idx' -> idx' 45 | Nothing -> 46 | error $ "Convert.prjIdx: type mismatch at " ++ ctxt ++ "\n " ++ 47 | "Couldn't match expected type `" ++ show (typeOf (undefined::t)) ++ 48 | "' with actual type `" ++ show (typeOf (undefined::t0)) ++ "'" 49 | prjIdx ctxt n (PushLayout l _) = prjIdx ctxt (n - 1) l 50 | prjIdx ctxt _ EmptyLayout = error $ "Convert.prjIdx: environment doesn't contain index at " ++ ctxt 51 | 52 | -- |Convert a closed HOAS term to a closed de Bruijn term. 53 | -- 54 | convert :: HOAS.Term t -> DeBruijn.Term () t 55 | convert = cvt EmptyLayout 56 | where 57 | cvt :: Layout env env -> HOAS.Term t -> DeBruijn.Term env t 58 | cvt lyt (HOAS.Tag sz) = DeBruijn.Var (prjIdx "'Tag' in vanilla convert" (size lyt - sz - 1) lyt) 59 | cvt lyt (HOAS.Con v) = DeBruijn.Con v 60 | cvt lyt (HOAS.Lam f) = DeBruijn.Lam (cvt lyt' (f tag)) 61 | where 62 | tag = HOAS.Tag (size lyt) 63 | lyt' = inc lyt `PushLayout` DeBruijn.ZeroIdx 64 | cvt lyt (HOAS.App fun arg) = DeBruijn.App (cvt lyt fun) (cvt lyt arg) 65 | 66 | -- |Convert a closed HOAS term to a closed de Bruijn term while recovering the sharing of the 67 | -- source expression. 68 | -- 69 | convertSharing :: Typeable t => HOAS.Term t -> DeBruijn.Term () t 70 | convertSharing = cvt EmptyLayout [] . recoverSharing 71 | where 72 | cvt :: Layout env env -> [StableSharingTerm] -> SharingTermFloated t -> DeBruijn.Term env t 73 | cvt lyt env (VarSharing st) 74 | | Just i <- findIndex (matchStableTerm st) env 75 | = DeBruijn.Var (prjIdx (ctxt ++ "; i = " ++ show i) i lyt) 76 | | null env 77 | = error $ "Cyclic definition of a term (st = " ++ show (hashStableTermHeight st) ++ ")" 78 | | otherwise 79 | = error $ "convertSharing: " ++ err 80 | where 81 | ctxt = "shared term with stable name " ++ show (hashStableTermHeight st) 82 | err = "inconsistent valuation @ " ++ ctxt ++ ";\n env = " ++ show env 83 | cvt lyt env (LetSharing st@(StableSharingTerm _ boundTerm) bodyTerm) 84 | = let lyt' = inc lyt `PushLayout` DeBruijn.ZeroIdx 85 | in 86 | DeBruijn.Let (cvt lyt env boundTerm) (cvt lyt' (st:env) bodyTerm) 87 | cvt lyt env (TermSharing _ (Tag lvl)) 88 | = DeBruijn.Var (prjIdx ("de Bruijn conversion tag " ++ show lvl) lvl lyt) 89 | cvt lyt env (TermSharing _ (Con v)) 90 | = DeBruijn.Con v 91 | cvt lyt env (TermSharing _ (Lam sts f)) 92 | = DeBruijn.Lam (cvt lyt' env' f) 93 | where 94 | lyt' = inc lyt `PushLayout` DeBruijn.ZeroIdx 95 | env' = sts:env 96 | cvt lyt env (TermSharing _ (App fun arg)) 97 | = DeBruijn.App (cvt lyt env fun) (cvt lyt env arg) 98 | -------------------------------------------------------------------------------- /Sharing.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE GADTs, RankNTypes, ScopedTypeVariables #-} 2 | 3 | module Sharing where 4 | 5 | import Control.Applicative 6 | import Control.Monad.Fix 7 | import Data.HashTable as Hash 8 | import qualified Data.IntMap as IntMap 9 | import Debug.Trace 10 | import System.IO.Unsafe (unsafePerformIO) 11 | import Data.List 12 | import Data.Maybe 13 | import Data.Typeable 14 | import System.Mem.StableName 15 | 16 | import HOAS (Level) 17 | import qualified HOAS as HOAS 18 | 19 | 20 | -- Occurences maps 21 | -- --------------- 22 | 23 | -- Opaque stable name for term nodes. It is being used to key the occurence map; hence, we need to 24 | -- hide the type parameter. 25 | -- 26 | data StableTermName where 27 | StableTermName :: Typeable t => StableName (HOAS.Term t) -> StableTermName 28 | 29 | instance Show StableTermName where 30 | show (StableTermName sn) = show $ hashStableName sn 31 | 32 | instance Eq StableTermName where 33 | StableTermName sn1 == StableTermName sn2 34 | | Just sn1' <- gcast sn1 = sn1' == sn2 35 | | otherwise = False 36 | 37 | makeStableTerm :: HOAS.Term t -> IO (StableName (HOAS.Term t)) 38 | makeStableTerm e = e `seq` makeStableName e 39 | 40 | -- Mutable occurence map 41 | 42 | -- Mutable hashtable version of the occurrence map keyed on the stable names of terms. It associates 43 | -- each term node with an occurence count and the height of the AST. 44 | -- 45 | type OccMapHash = Hash.HashTable StableTermName (Int, Int) 46 | 47 | -- Create a new hash table keyed on AST nodes. 48 | -- 49 | newOccMapHashTable :: IO OccMapHash 50 | newOccMapHashTable = Hash.new (==) hashStableTerm 51 | where 52 | hashStableTerm (StableTermName sn) = fromIntegral (hashStableName sn) 53 | 54 | -- Enter one term node occurrence into an occurrence map. Returns 'Just h' if this is a repeated 55 | -- occurence and the height of the repeatedly occuring term is 'h'. 56 | -- 57 | -- If this is the first occurence, the 'height' *argument* must provide the height of the term; 58 | -- otherwise, the height will be *extracted* from the occurence map. In the latter case, this 59 | -- function yields the term height. 60 | -- 61 | enterOcc :: OccMapHash -> StableTermName -> Int -> IO (Maybe Int) 62 | enterOcc occMap sa height 63 | = do 64 | entry <- Hash.lookup occMap sa 65 | case entry of 66 | Nothing -> Hash.insert occMap sa (1 , height) >> return Nothing 67 | Just (n, heightS) -> Hash.update occMap sa (n + 1, heightS) >> return (Just heightS) 68 | 69 | -- Immutable occurence map 70 | 71 | -- Immutable version of the occurence map (storing the occurence count only, not the height). We 72 | -- use the 'StableName' hash to index an 'IntMap' and disambiguate 'StableName's with identical 73 | -- hashes explicitly, storing them in a list in the 'IntMap'. 74 | -- 75 | type OccMap = IntMap.IntMap [(StableTermName, Int)] 76 | 77 | -- Turn a mutable into an immutable occurence map. 78 | -- 79 | freezeOccMap :: OccMapHash -> IO OccMap 80 | freezeOccMap oc 81 | = do 82 | kvs <- map dropHeight <$> Hash.toList oc 83 | return . IntMap.fromList . map (\kvs -> (key (head kvs), kvs)). groupBy sameKey $ kvs 84 | where 85 | key (StableTermName sn, _) = hashStableName sn 86 | sameKey kv1 kv2 = key kv1 == key kv2 87 | dropHeight (k, (cnt, _)) = (k, cnt) 88 | 89 | -- Look up the occurence map keyed by array computations using a stable name. If a the key does 90 | -- not exist in the map, return an occurence count of '1'. 91 | -- 92 | lookupWithTermName :: OccMap -> StableTermName -> Int 93 | lookupWithTermName oc sa@(StableTermName sn) 94 | = fromMaybe 1 $ IntMap.lookup (hashStableName sn) oc >>= Prelude.lookup sa 95 | 96 | -- Look up the occurence map keyed by array computations using a sharing array computation. If an 97 | -- the key does not exist in the map, return an occurence count of '1'. 98 | -- 99 | lookupWithSharingTerm :: OccMap -> StableSharingTerm -> Int 100 | lookupWithSharingTerm oc (StableSharingTerm (StableTermHeight sn _) _) 101 | = lookupWithTermName oc (StableTermName sn) 102 | 103 | 104 | -- Term structure for sharing recovery 105 | -- ----------------------------------- 106 | 107 | -- Terms for sharing recovery consist of two mutually recursive datatypes. The one hear hold the 108 | -- actual lambda term forms. 'SharingTerm' keeps track of stable names (for identification) as well 109 | -- as where variables and let bindings need to be introduced. 110 | -- 111 | data Term binder t where 112 | -- for conversion to de Bruijn 113 | Tag :: Typeable t 114 | => Level -> Term binder t 115 | 116 | Con :: (Typeable t, Show t) 117 | => t -> Term binder t 118 | Lam :: (Typeable s, Typeable t, Show s, Show t) 119 | => binder -> SharingTerm binder t -> Term binder (s -> t) 120 | App :: (Typeable s, Typeable t, Show s, Show t) 121 | => SharingTerm binder (s -> t) -> SharingTerm binder s -> Term binder t 122 | 123 | showTermOp :: Term binder t -> String 124 | showTermOp (Tag lvl) = "Tag " ++ show lvl 125 | showTermOp (Con v) = "Con " ++ show v 126 | showTermOp (Lam {}) = "Lam" 127 | showTermOp (App {}) = "App" 128 | 129 | 130 | -- Stable term nodes 131 | -- ----------------- 132 | 133 | -- Stable name for a term node including the height of the term. 134 | -- 135 | data StableTermHeight t = StableTermHeight (StableName (HOAS.Term t)) Int 136 | 137 | instance Eq (StableTermHeight t) where 138 | (StableTermHeight sn1 _) == (StableTermHeight sn2 _) = sn1 == sn2 139 | 140 | higherSTH :: StableTermHeight t1 -> StableTermHeight t2 -> Bool 141 | StableTermHeight _ h1 `higherSTH` StableTermHeight _ h2 = h1 > h2 142 | 143 | hashStableTermHeight :: StableTermHeight t -> Int 144 | hashStableTermHeight (StableTermHeight sn _) = hashStableName sn 145 | 146 | 147 | -- Sharing information in terms 148 | -- ---------------------------- 149 | 150 | -- Interleave sharing annotations into a term. Subterms can be marked as being represented by 151 | -- variable (binding a shared subtree) using 'VarSharing' and as being prefixed by a let binding 152 | -- (for a shared subtree) using 'LetSharing'. 153 | -- 154 | data SharingTerm binder t where 155 | VarSharing :: Typeable t 156 | => StableTermHeight t -> SharingTerm binder t 157 | LetSharing :: StableSharingTerm -> SharingTerm binder t -> SharingTerm binder t 158 | TermSharing :: Typeable t 159 | => StableTermHeight t -> Term binder t -> SharingTerm binder t 160 | 161 | -- Stable name for a term associated with its sharing-annotated version. 162 | -- 163 | data StableSharingTerm where 164 | StableSharingTerm :: Typeable t => StableTermHeight t -> SharingTermFloated t -> StableSharingTerm 165 | 166 | instance Show StableSharingTerm where 167 | show (StableSharingTerm sn _) = show $ hashStableTermHeight sn 168 | 169 | instance Eq StableSharingTerm where 170 | StableSharingTerm sn1 _ == StableSharingTerm sn2 _ 171 | | Just sn1' <- gcast sn1 = sn1' == sn2 172 | | otherwise = False 173 | 174 | higherSST :: StableSharingTerm -> StableSharingTerm -> Bool 175 | StableSharingTerm sn1 _ `higherSST` StableSharingTerm sn2 _ = sn1 `higherSTH` sn2 176 | 177 | -- Test whether the given stable names matches an array computation with sharing. 178 | -- 179 | matchStableTerm :: Typeable t => StableTermHeight t -> StableSharingTerm -> Bool 180 | matchStableTerm sn1 (StableSharingTerm sn2 _) 181 | | Just sn1' <- gcast sn1 = sn1' == sn2 182 | | otherwise = False 183 | 184 | -- Dummy entry for environments to be used for unused variables. 185 | -- 186 | noStableTermName :: StableTermHeight t 187 | noStableTermName = unsafePerformIO $ StableTermHeight <$> makeStableName undefined <*> pure 0 188 | 189 | 190 | -- Sharing recovery 191 | -- ---------------- 192 | 193 | -- Sharing terms after computing the occurence map and pruning repeated subtrees. 194 | -- 195 | -- Lambdas are annotated with the level of their binder. 196 | -- 197 | type SharingTermPruned t = SharingTerm Level t 198 | 199 | -- Sharing terms after sharing recovery and shared subtrees have been floated to their let-binding 200 | -- positions. 201 | -- 202 | -- Lambdas are annotated by the 'StableSharingTerm' identifying their binder. 203 | -- 204 | type SharingTermFloated t = SharingTerm StableSharingTerm t 205 | 206 | -- |Recover sharing information and annotate the HOAS AST with variable and let binding annotations. 207 | -- 208 | -- NB: Strictly speaking, this function is not deterministic, as it uses stable pointers to 209 | -- determine the sharing of subterms. The stable pointer API does not guarantee its 210 | -- completeness; i.e., it may miss some equalities, which implies that we may fail to discover 211 | -- some sharing. However, sharing does not affect the denotational meaning of a term; hence, 212 | -- we do not compromise denotational correctness. 213 | -- 214 | -- There is one caveat: We currently rely on 'Tag' leaves representing free variables to be 215 | -- shared if any of them is used more than once. If one is duplicated, the environment for 216 | -- de Bruijn conversion will have a duplicate entry, and hence, be of the wrong size, which 217 | -- is fatal. (The 'lookupStableTag' function will already bail out.) 218 | -- 219 | recoverSharing :: Typeable t => HOAS.Term t -> SharingTermFloated t 220 | {-# NOINLINE recoverSharing #-} 221 | recoverSharing term 222 | = let (term', occMap) = 223 | unsafePerformIO $ do -- to enable stable pointers; it's safe as explained above 224 | { (term', occMap) <- makeOccMap 0 term 225 | ; frozenOccMap <- freezeOccMap occMap 226 | ; return (term', frozenOccMap) 227 | } 228 | in 229 | determineScopes occMap term' 230 | 231 | -- Compute the term occurence map, mark all nodes with stable names, and drop repeated occurences 232 | -- of shared subterms (Phase One). 233 | -- 234 | -- Note [Traversing functions and side effects] 235 | -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 236 | -- We need to descent into function bodies to build the 'OccMap' with all occurences in the 237 | -- function bodies. Due to the side effects in the construction of the occurence map and, more 238 | -- importantly, the dependence of the second phase on /global/ occurence information, we may not 239 | -- delay the body traversals by putting them under a lambda. Hence, we apply each function, to 240 | -- traverse its body and use a /dummy abstraction/ of the result. 241 | -- 242 | -- For example, given a function 'f', we traverse 'f (Tag 0)', which yields a transformed body 'e'. 243 | -- As the result of the traversal of the overall function, we use 'const e'. Hence, it is crucial 244 | -- that the 'Tag' supplied during the initial traversal is already the one required by the HOAS to 245 | -- de Bruijn conversion in 'Convert.convertSharing' — any subsequent application of 'const e' will 246 | -- only yield 'e' with the embedded 'Tag 0' of the original application. During sharing recovery, 247 | -- we float /all/ free variables ('Tag' terms) out to construct the initial environment for producing 248 | -- de Bruijn indices, which replaces them by 'VarSharing' nodes. Hence, the tag values only serve 249 | -- the purpose of determining the ordering in that initial environment. They are /not/ directly used 250 | -- to compute the de Brujin indices. 251 | -- 252 | makeOccMap :: Typeable t => Level -> HOAS.Term t -> IO (SharingTermPruned t, OccMapHash) 253 | makeOccMap lvl rootTerm 254 | = do 255 | { occMap <- newOccMapHashTable 256 | ; (rootTerm', _) <- traverse lvl occMap rootTerm 257 | ; return (rootTerm', occMap) 258 | } 259 | where 260 | traverse :: forall t. Typeable t 261 | => Level -> OccMapHash -> HOAS.Term t -> IO (SharingTermPruned t, Int) 262 | traverse lvl occMap term 263 | = mfix $ \ ~(_, height) -> do 264 | { -- Compute stable name and enter it into the occurence map 265 | ; sn <- makeStableTerm term 266 | ; heightIfRepeatedOccurence <- enterOcc occMap (StableTermName sn) height 267 | 268 | ; traceLine (HOAS.showTermOp term) $ 269 | case heightIfRepeatedOccurence of 270 | Just height -> "REPEATED occurence (sn = " ++ show (hashStableName sn) ++ 271 | "; height = " ++ show height ++ ")" 272 | Nothing -> "first occurence (sn = " ++ show (hashStableName sn) ++ ")" 273 | 274 | -- Reconstruct the term in shared form. 275 | -- 276 | -- In case of a repeated occurence, the height comes from the occurence map; otherwise, 277 | -- it is computed by the traversal function passed in 'newTerm'. See also 'enterOcc'. 278 | -- 279 | -- NB: This function can only be used in the case alternatives below; outside of the 280 | -- case we cannot discharge the 'Typeable t' constraint. 281 | ; let reconstruct :: Typeable t 282 | => IO (Term Level t, Int) 283 | -> IO (SharingTerm Level t, Int) 284 | reconstruct newTerm 285 | = case heightIfRepeatedOccurence of 286 | Just height 287 | -> return (VarSharing (StableTermHeight sn height), height) 288 | _ -> do 289 | { (term, height) <- newTerm 290 | ; return (TermSharing (StableTermHeight sn height) term, height) 291 | } 292 | 293 | ; case term of 294 | HOAS.Tag i -> reconstruct $ return (Tag i, 0) -- height is 0! 295 | HOAS.Con v -> reconstruct $ return (Con v, 1) 296 | HOAS.Lam f -> reconstruct $ do 297 | { -- see Note [Traversing functions and side effects] 298 | ; (body, h) <- traverse (lvl + 1) occMap (f (HOAS.Tag lvl)) 299 | ; return (Lam lvl body, h + 1) 300 | } 301 | HOAS.App f a -> reconstruct $ do 302 | { (f', h1) <- traverse lvl occMap f 303 | ; (a', h2) <- traverse lvl occMap a 304 | ; return (App f' a', h1 `max` h2 + 1) 305 | } 306 | } 307 | 308 | -- Type used to maintain how often each shared subterm, so far, occured during a bottom-up sweep. 309 | -- 310 | -- Invariants: 311 | -- - If one shared term 's' is itself a subterm of another shared term 't', then 's' must occur 312 | -- *after* 't' in the 'NodeCounts'. 313 | -- - No shared term occurs twice. 314 | -- - A term may have a final occurence count of only 1 iff it is either a free variable ('Tag') 315 | -- or an array computation listed out of an expression. 316 | -- 317 | -- We determine the subterm property by using the tree height in 'StableTermHeight'. Trees get 318 | -- smaller towards the end of a 'NodeCounts' list. The height of free variables ('Tag') is 0, 319 | -- whereas other leaves have height 1. This guarantees that all free variables are at the end 320 | -- of the 'NodeCounts' list. 321 | -- 322 | -- To ensure the invariant is preserved over merging node counts from sibling subterms, the 323 | -- function '(+++)' must be used. 324 | -- 325 | type NodeCounts = [NodeCount] 326 | 327 | data NodeCount = NodeCount StableSharingTerm Int 328 | deriving Show 329 | 330 | -- Empty node counts 331 | -- 332 | noNodeCounts :: NodeCounts 333 | noNodeCounts = [] 334 | 335 | -- Singleton node counts for 'Acc' 336 | -- 337 | nodeCount :: StableSharingTerm -> Int -> NodeCounts 338 | nodeCount sst n = [NodeCount sst n] 339 | 340 | -- Combine node counts that belong to the same node. 341 | -- 342 | -- * We assume that the node counts invariant —subterms follow their parents— holds for both 343 | -- arguments and guarantee that it still holds for the result. 344 | -- 345 | (+++) :: NodeCounts -> NodeCounts -> NodeCounts 346 | us +++ vs = foldr insert us vs 347 | where 348 | insert x [] = [x] 349 | insert x@(NodeCount st1 count1) ys@(y@(NodeCount st2 count2) : ys') 350 | | st1 == st2 = NodeCount (st1 `pickNoneVar` st2) (count1 + count2) : ys' 351 | | st1 `higherSST` st2 = x : ys 352 | | otherwise = y : insert x ys' 353 | 354 | (StableSharingTerm _ (VarSharing _)) `pickNoneVar` st2 = st2 355 | st1 `pickNoneVar` _st2 = st1 356 | 357 | -- Determine whether a 'NodeCount' is for a 'Tag', which represent free variables. 358 | -- 359 | isFreeVar :: NodeCount -> Bool 360 | isFreeVar (NodeCount (StableSharingTerm _ (TermSharing _ (Tag _))) _) = True 361 | isFreeVar _ = False 362 | 363 | -- Determine the scopes of all variables representing shared subterms (Phase Two) in a bottom-up 364 | -- sweep. 365 | -- 366 | -- Precondition: there are only 'VarSharing' and 'TermSharing' nodes in the argument. 367 | -- 368 | determineScopes :: Typeable t 369 | => OccMap -> SharingTermPruned t -> SharingTermFloated t 370 | determineScopes occMap rootTerm 371 | = let 372 | (sharingTerm, counts) = scopes rootTerm 373 | in 374 | if null counts 375 | then sharingTerm 376 | else error $ "determineScopes: unbound shared subtrees: " ++ show counts 377 | where 378 | scopes :: forall t. SharingTermPruned t -> (SharingTermFloated t, NodeCounts) 379 | scopes (LetSharing _ _) 380 | = error $ "determineScopes: scopesAcc: unexpected 'LetSharing'" 381 | scopes (VarSharing sn) 382 | = (VarSharing sn, StableSharingTerm sn (VarSharing sn) `nodeCount` 1) 383 | scopes (TermSharing sn pterm) 384 | = case pterm of 385 | Tag i -> reconstruct (Tag i) noNodeCounts 386 | Con v -> reconstruct (Con v) noNodeCounts 387 | Lam lvl f -> let 388 | (f', count) = scopes f 389 | (stableTag, count') = lookupStableTag lvl count 390 | in 391 | reconstruct (Lam stableTag f') count' 392 | App f a -> let 393 | (f', count1) = scopes f 394 | (a', count2) = scopes a 395 | in 396 | reconstruct (App f' a') (count1 +++ count2) 397 | where 398 | -- Occurence count of the currently processed node 399 | occCount = let StableTermHeight sn' _ = sn 400 | in 401 | lookupWithTermName occMap (StableTermName sn') 402 | 403 | -- Reconstruct the current tree node. 404 | -- 405 | -- * If the current node is being shared ('occCount > 1'), replace it by a 'VarSharing' 406 | -- node and float the shared subtree out wrapped in a 'NodeCounts' value. 407 | -- * If the current node is not shared, reconstruct it in place. 408 | -- * Special case for free variables ('Tag'): Replace the tree by a sharing variable and 409 | -- float the 'Tag' out in a 'NodeCounts' value. This is idependent of the number of 410 | -- occurences. 411 | -- 412 | -- In either case, any completed 'NodeCounts' are injected as bindings using 'LetSharing' 413 | -- node. 414 | -- 415 | reconstruct :: Typeable t 416 | => Term StableSharingTerm t -> NodeCounts -> (SharingTermFloated t, NodeCounts) 417 | reconstruct newTerm@(Tag _) _subCount 418 | -- free variable => replace by a sharing variable regardless of the number of occ.s 419 | = let thisCount = StableSharingTerm sn (TermSharing sn newTerm) `nodeCount` 1 420 | in 421 | tracePure "FREE" (show thisCount) $ 422 | (VarSharing sn, thisCount) 423 | reconstruct newTerm subCount 424 | -- shared subtree => replace by a sharing variable 425 | | occCount > 1 426 | = let allCount = (StableSharingTerm sn sharingTerm `nodeCount` 1) +++ newCount 427 | in 428 | tracePure ("SHARED" ++ completed) (show allCount) $ 429 | (VarSharing sn, allCount) 430 | -- neither shared nor free variable => leave it as it is 431 | | otherwise 432 | = tracePure ("Normal" ++ completed) (show newCount) $ 433 | (sharingTerm, newCount) 434 | where 435 | -- Determine the bindings that needs to be attached to the current node... 436 | (newCount, bindHere) = filterCompleted subCount 437 | 438 | -- ...and wrap them in 'LetSharing' constructors 439 | lets = foldl (flip (.)) id . map LetSharing $ bindHere 440 | sharingTerm = lets $ TermSharing sn newTerm 441 | 442 | -- trace support 443 | completed | null bindHere = "" 444 | | otherwise = "(" ++ show (length bindHere) ++ " lets)" 445 | 446 | -- Extract *leading* nodes that have a complete node count (i.e., their node count is equal 447 | -- to the number of occurences of that node in the overall expression). 448 | -- 449 | -- Nodes with a completed node count should be let bound at the currently processed node. 450 | -- 451 | -- NB: Only extract leading nodes (i.e., the longest run at the *front* of the list that is 452 | -- complete). Otherwise, we would let-bind subterms before their parents, which leads 453 | -- to scope errors. 454 | -- 455 | filterCompleted :: NodeCounts -> (NodeCounts, [StableSharingTerm]) 456 | filterCompleted counts 457 | = let (completed, counts') = break notComplete counts 458 | in (counts', [sa | NodeCount sa _ <- completed]) 459 | where 460 | -- a node is not yet complete while the node count 'n' is below the overall number 461 | -- of occurences for that node in the whole program, with the exception that free 462 | -- variables are never complete 463 | notComplete nc@(NodeCount st n) | not . isFreeVar $ nc = lookupWithSharingTerm occMap st > n 464 | notComplete _ = True 465 | 466 | -- Find the stable tag representing the binder at the given level (which equals the tag value). 467 | -- 468 | -- If there is not such tag, the binder has no usage occurence. 469 | -- 470 | -- if the tag occurs multiple times, sharing of the binder tag was not preserved and we cannot 471 | -- continue (c.f., comments at 'determineScopes'). 472 | -- 473 | lookupStableTag :: Level -> NodeCounts -> (StableSharingTerm, NodeCounts) 474 | lookupStableTag lvl counts 475 | = case partition hasLevel counts of 476 | ([], _) -> (noStableSharing, counts) -- tag not used in expression 477 | ([NodeCount st _], counts') -> (st, counts') -- tag has a unique occurence 478 | (counts', _) -> 479 | error $ "lookupStableTag: duplicate 'Tag's\n " ++ 480 | intercalate ", " [showST st | NodeCount st _ <- counts'] 481 | where 482 | hasLevel (NodeCount (StableSharingTerm _ (TermSharing _ (Tag lvl'))) _) = lvl == lvl' 483 | hasLevel (NodeCount st _) = False 484 | 485 | noStableSharing :: StableSharingTerm 486 | noStableSharing = StableSharingTerm noStableTermName (undefined :: SharingTermFloated ()) 487 | 488 | showST (StableSharingTerm _ (TermSharing sn term)) = show (hashStableTermHeight sn) ++ ": " ++ 489 | showTermOp term 490 | showST (StableSharingTerm _ (VarSharing sn)) = "VarSharing " ++ show (hashStableTermHeight sn) 491 | showST (StableSharingTerm _ (LetSharing st _ )) = "LetSharing " ++ show st ++ "..." 492 | 493 | 494 | -- Debugging 495 | -- --------- 496 | 497 | traceOn :: Bool 498 | traceOn = False 499 | 500 | tracePure :: String -> String -> a -> a 501 | tracePure header msg val | traceOn = trace (header ++ ": " ++ msg) val 502 | | otherwise = val 503 | 504 | traceLine :: String -> String -> IO () 505 | traceLine header msg = tracePure header msg $ return () 506 | --------------------------------------------------------------------------------