├── ChangeLog.md ├── LICENSE ├── Main.hs ├── README.md ├── Setup.hs ├── cabal.project ├── data ├── gen-data.hs ├── test.csv └── test1.csv ├── sql-haskell.cabal └── src ├── Compiler.hs ├── Interpreter.hs ├── LMS.hs ├── SimpleInterpreter.hs ├── SimpleStagedInterpreter.hs ├── StreamInterpreter.hs ├── StreamInterpreter2.hs ├── StreamLMS.hs └── Test.hs /ChangeLog.md: -------------------------------------------------------------------------------- 1 | # Revision history for sql-haskell 2 | 3 | ## 0.1.0.0 -- YYYY-mm-dd 4 | 5 | * First version. Released on an unsuspecting world. 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018, Matthew Pickering 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of Matthew Pickering nor the names of other 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /Main.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import qualified Test 4 | 5 | main :: IO () 6 | main = Test.main 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This repo contains a Haskell implementation of 2 | 3 | > Rompf, Tiark, and Nada Amin. "Functional pearl: a SQL to C compiler in 500 lines of code." Acm Sigplan Notices 50.9 (2015): 2-9. 4 | 5 | The paper describes a simple SQL query compiler which uses generative programming 6 | techniques. The idea is that we first define a definitional interpreter, which 7 | we then stage in order to produce a compiler for a specific known SQL query. 8 | Further to this, the authors use the LMS framework in scala which makes staging 9 | a program as easy as adding some type annotations. The `LMS` module explicitly 10 | implements this idea without any magic that LMS performs to make the transformation 11 | appear transparent. 12 | 13 | There are 3 modules of interest: 14 | 15 | 1 `Interpreter.hs` - A basic naive interpreter 16 | 2. `Compiler.hs` - A staged interpreter 17 | 3. `LMS.hs` - A definition which can either be specialised to a compiler or an 18 | interpreter by providing a type annotation. 19 | 20 | 21 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /cabal.project: -------------------------------------------------------------------------------- 1 | profiling: true 2 | packages: . 3 | -------------------------------------------------------------------------------- /data/gen-data.hs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cabal 2 | {- cabal: 3 | build-depends: base, random-strings, streaming-bytestring, bytestring, streaming, resourcet 4 | -} 5 | module Main where 6 | 7 | import Control.Monad 8 | import Data.List 9 | import Test.RandomStrings 10 | import qualified Data.ByteString.Streaming.Char8 as Q 11 | import qualified Streaming.Prelude as S 12 | import Streaming 13 | import qualified Streaming.Internal as I 14 | import Control.Monad.Trans.Resource 15 | 16 | c_limit = 2 ^ c_limit_k 17 | 18 | c_limit_k = 4 19 | 20 | file_line :: Int -> Q.ByteString ResIO () 21 | file_line l = 22 | Q.mwrap (liftIO $ Q.string . intercalate "," <$> randomStringsLen (randomString (onlyAlpha randomASCII)) (1, 5) l) 23 | 24 | {- 25 | main :: IO () 26 | main = 27 | 28 | forM_ [0..c_limit_k] $ \c -> 29 | let cols = 10 ^ c 30 | headers = ["f" ++ show n | n <- [0..cols] ] 31 | header = intercalate "," headers 32 | in forM_ [0..5] $ \i -> do 33 | let n = 10 ^ i 34 | fname = "data-" ++ show n ++ "-" ++ show cols ++ ".csv" 35 | file_lines <- replicateM n (file_line cols) 36 | let f = header : file_lines 37 | writeFile fname (unlines f) 38 | -} 39 | 40 | main :: IO () 41 | main = runResourceT $ 42 | 43 | forM_ [0..c_limit_k] $ \c -> 44 | let cols = 2 ^ c 45 | header :: Q.ByteString ResIO () 46 | header = Q.pack $ 47 | intercalates (S.yield ',') $ 48 | S.subst (\n -> S.yield 'f' >> S.each (show n)) (S.take cols $ S.enumFrom 0) 49 | in forM_ [0..5] $ \i -> do 50 | let n = 10 ^ i 51 | fname = "data-" ++ show n ++ "-" ++ show cols ++ ".csv" 52 | let file_lines = I.replicates n (file_line cols) 53 | let f = do I.yields header >> file_lines 54 | liftIO $ print fname 55 | Q.writeFile fname (Q.unlines f) 56 | 57 | -------------------------------------------------------------------------------- /data/test1.csv: -------------------------------------------------------------------------------- 1 | name,weight 2 | john,100 3 | alex,80 4 | tim,60 5 | -------------------------------------------------------------------------------- /sql-haskell.cabal: -------------------------------------------------------------------------------- 1 | -- Initial sql-haskell.cabal generated by cabal init. For further 2 | -- documentation, see http://haskell.org/cabal/users-guide/ 3 | 4 | name: sql-haskell 5 | version: 0.1.0.0 6 | -- synopsis: 7 | -- description: 8 | license: BSD3 9 | license-file: LICENSE 10 | author: Matthew Pickering 11 | maintainer: matthewtpickering@gmail.com 12 | -- copyright: 13 | -- category: 14 | build-type: Simple 15 | extra-source-files: ChangeLog.md 16 | cabal-version: >=1.10 17 | 18 | library 19 | exposed-modules: Test Compiler Interpreter LMS SimpleInterpreter StreamLMS StreamInterpreter 20 | -- SimpleStagedInterpreter 21 | -- other-modules: 22 | -- other-extensions: 23 | build-depends: base >=4.10, scanner, bytestring, template-haskell 24 | , th-lift-instances, file-embed, dump-core, streaming-bytestring, 25 | resourcet, weigh, streaming, criterion, silently 26 | hs-source-dirs: src 27 | ghc-options: -Wall -O2 -ddump-str-signatures 28 | default-language: Haskell2010 29 | 30 | executable test 31 | main-is: Main.hs 32 | other-modules: 33 | build-depends: sql-haskell, base >= 4.10 34 | ghc-options: -O0 -rtsopts -threaded 35 | -------------------------------------------------------------------------------- /src/Compiler.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE TemplateHaskell #-} 3 | {-# LANGUAGE NoImplicitPrelude #-} 4 | {-# LANGUAGE ScopedTypeVariables #-} 5 | {-# LANGUAGE DeriveLift #-} 6 | module Compiler where 7 | 8 | import qualified Data.ByteString as B 9 | import qualified Data.ByteString.Char8 as BC 10 | import Data.ByteString (ByteString) 11 | import Control.Monad 12 | import Data.List 13 | import Data.Maybe 14 | import Language.Haskell.TH 15 | import Language.Haskell.TH.Syntax 16 | import Prelude hiding (Applicative(..), getLine, getChar) 17 | import qualified Prelude as P 18 | import Instances.TH.Lift () 19 | 20 | eq :: Eq a => Code a -> Code a -> Code Bool 21 | eq (Code e1) (Code e2) = Code [|| $$e1 == $$e2 ||] 22 | 23 | neq :: Eq a => Code a -> Code a -> Code Bool 24 | neq (Code e1) (Code e2) = Code [|| $$e1 /= $$e2 ||] 25 | 26 | newtype Code a = Code (Q (TExp a)) 27 | 28 | runCode :: Code a -> QTExp a 29 | runCode (Code a) = a 30 | 31 | pure :: Lift a => a -> Code a 32 | pure = Code . unsafeTExpCoerce . lift 33 | 34 | infixl 4 <*> 35 | (<*>) :: Code (a -> b) -> Code a -> Code b 36 | (Code f) <*> (Code a) = Code [|| $$f $$a ||] 37 | 38 | type Fields = [QTExp ByteString] 39 | type Schema = [ByteString] 40 | type Res = IO () 41 | type Table = FilePath 42 | 43 | data Record = Record { fields :: Fields, schema :: Schema } 44 | 45 | data ResRecord = ResRecord { fields_r :: [ByteString], schema_r :: Schema } 46 | 47 | getField :: ByteString -> Record -> QTExp ByteString 48 | getField field (Record fs sch) = 49 | let i = fromJust (elemIndex field sch) 50 | in (fs !! i) 51 | 52 | getFields :: [ByteString] -> Record -> [QTExp ByteString] 53 | getFields fs r = map (flip getField r) fs 54 | 55 | data Operator = Scan FilePath Schema | Project Schema Schema Operator 56 | | Filter Predicate Operator | Join Operator Operator deriving Show 57 | 58 | query :: Operator 59 | query = Project ["age"] ["age"] (Filter (Eq (Value "john") (Field "name")) (Scan "data/test.csv" ["name", "age"])) 60 | 61 | query2 :: Operator 62 | query2 = Project ["name"] ["name"] (Filter (Eq (Value "34") (Field "age")) (Scan "data/test.csv" ["name", "age"])) 63 | 64 | queryJoin :: Operator 65 | queryJoin = Join (Scan "data/test.csv" ["name", "age"]) (Scan "data/test1.csv" ["name", "weight"]) 66 | 67 | queryProj :: Operator 68 | queryProj = Project ["age"] ["age"] (Scan "data/test.csv" ["name", "age"]) 69 | 70 | data Predicate = Eq Ref Ref | Ne Ref Ref deriving Show 71 | 72 | data Ref = Field ByteString | Value ByteString deriving Show 73 | 74 | type QTExp a = Q (TExp a) 75 | 76 | fix :: (a -> a) -> a 77 | fix f = let x = f x in x 78 | 79 | getLineS :: Scanner -> ByteString 80 | getLineS = getLine . runScanner 81 | 82 | getLine = getChar '\n' 83 | {-# NOINLINE getLine #-} 84 | 85 | getChar c s = BC.takeWhile (/= c) s 86 | {-# NOINLINE getChar #-} 87 | 88 | skipLine = skipChar '\n' 89 | skipLineS = skipCharS '\n' 90 | skipChar c s = BC.tail (BC.dropWhile (/= c) s) 91 | skipCharS c s = Scanner (skipChar c (runScanner s)) 92 | {-# NOINLINE skipLine #-} 93 | {-# NOINLINE skipChar #-} 94 | {-# NOINLINE skipCharS #-} 95 | {-# NOINLINE skipLineS #-} 96 | 97 | data Scanner = Scanner ByteString 98 | 99 | runScanner (Scanner b) = b 100 | 101 | newScanner :: FilePath -> IO Scanner 102 | newScanner fp = Scanner <$> B.readFile fp 103 | 104 | nextLine :: Code Scanner -> (Code ByteString, Code Scanner) 105 | nextLine s = 106 | let fs = Code [|| getLineS $$(runCode s) ||] 107 | rs = Code [|| skipLineS $$(runCode s) ||] 108 | in (fs, rs) 109 | 110 | hasNext :: Scanner -> Bool 111 | hasNext (Scanner bs) = bs /= "" 112 | 113 | while :: 114 | Monoid m => 115 | QTExp (t -> Bool) -> QTExp ((t -> IO m) -> t -> IO m) -> QTExp (t -> IO m) 116 | while k b = [|| fix (\r rs -> whenM ($$k rs) ($$b r rs)) ||] 117 | 118 | whenM :: Monoid m => Bool -> m -> m 119 | whenM b act = if b then act else mempty 120 | 121 | processCSV :: forall m .Monoid m => Schema -> FilePath -> (Record -> QTExp (IO m)) -> QTExp (IO m) 122 | processCSV ss f yld = 123 | [|| do 124 | bs <- newScanner f 125 | $$(rows ss) bs ||] 126 | where 127 | rows :: Schema -> QTExp (Scanner -> IO m) 128 | rows sch = do 129 | while [|| hasNext ||] 130 | [|| \r rs -> do 131 | $$(let (hs, ts) = nextLine (Code [|| rs ||]) 132 | in [|| $$(yld (Record (parseRow sch (runCode hs)) sch)) 133 | >> r $$(runCode ts) ||] ) ||] 134 | 135 | -- We can't use the standard |BC.split| function here because 136 | -- we we statically know how far we will unroll. The result is then 137 | -- more static as we can do things like drop certain fields if we 138 | -- perform a projection. 139 | parseRow :: Schema -> QTExp ByteString -> [QTExp ByteString] 140 | parseRow [] _ = [] 141 | parseRow [_] b = 142 | [[|| skipLine $$b ||]] 143 | parseRow (_:ss') b = 144 | let new = [|| skipChar ',' $$b ||] 145 | rs = parseRow ss' new 146 | in ([|| skipChar ',' $$b ||] : rs) 147 | 148 | 149 | 150 | printFields :: Fields -> QTExp Res 151 | printFields [] = [|| return () ||] 152 | printFields [x] = [|| BC.putStrLn $$x ||] 153 | printFields (x:xs) = 154 | [|| B.putStr $$x >> BC.putStr "," >> $$(printFields xs) ||] 155 | 156 | evalPred :: Predicate -> Record -> Code Bool 157 | evalPred predicate rec = 158 | case predicate of 159 | Eq a b -> eq (evalRef a rec) (evalRef b rec) 160 | Ne a b -> neq (evalRef a rec) (evalRef b rec) 161 | 162 | evalRef :: Ref -> Record -> Code ByteString 163 | evalRef (Value a) _ = Code [|| a ||] 164 | evalRef (Field name) r = Code (getField name r) 165 | 166 | 167 | restrict :: Record -> Schema -> Schema -> Record 168 | restrict r newSchema parentSchema = 169 | Record (map (flip getField r) parentSchema) newSchema 170 | 171 | -- We can unroll (==) if we statically know the length of each argument. 172 | _eq :: Eq a => [QTExp a] -> [QTExp a] -> QTExp Bool 173 | _eq [] [] = [|| True ||] 174 | _eq (v:vs) (v1:v1s) = [|| if $$v == $$v1 then $$(_eq vs v1s) else False ||] 175 | _eq _ _ = [|| False ||] 176 | 177 | execOp :: Monoid m => Operator -> (Record -> QTExp (IO m)) -> QTExp (IO m) 178 | execOp op yld = 179 | case op of 180 | Scan file sch -> 181 | processCSV sch file yld 182 | Filter predicate parent -> execOp parent 183 | (\rec -> [|| whenM $$(runCode $ evalPred predicate rec) $$(yld rec) ||] ) 184 | Project newSchema parentSchema parent -> 185 | execOp parent (\rec -> yld (restrict rec newSchema parentSchema )) 186 | Join left right -> 187 | execOp left (\rec -> execOp right (\rec' -> 188 | let keys = schema rec `intersect` schema rec' 189 | in [|| whenM $$(_eq (getFields keys rec) (getFields keys rec')) 190 | ($$(yld (Record (fields rec ++ fields rec') 191 | (schema rec ++ schema rec')))) ||] )) 192 | 193 | runQuery :: Operator -> QTExp Res 194 | runQuery q = execOp q (printFields . fields) 195 | 196 | inj :: ResRecord -> IO [ResRecord] 197 | inj = return . return 198 | 199 | -- We still need to eliminate the binding time abstraction 200 | runQueryL :: Operator -> QTExp (IO [ResRecord]) 201 | runQueryL o = execOp o (\r -> [|| inj $$(spill r) ||]) 202 | 203 | spill :: Record -> QTExp ResRecord 204 | spill (Record rs ss) = [|| ResRecord $$(spill2 rs) ss ||] 205 | 206 | spill2 :: [QTExp ByteString] -> QTExp [ByteString] 207 | spill2 [] = [|| [] ||] 208 | spill2 (x:xs) = [|| $$x : $$(spill2 xs) ||] 209 | 210 | test :: IO () 211 | test = do 212 | -- processCSV "data/test.csv" (print . getField "name") 213 | expr <- runQ $ unTypeQ $ execOp query (printFields . fields) 214 | -- expr <- runQ $ unTypeQ $ power 215 | putStrLn $ pprint expr 216 | 217 | 218 | -------------------------------------------------------------------------------- /src/Interpreter.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | module Interpreter where 3 | 4 | import qualified Data.ByteString as B 5 | import qualified Data.ByteString.Char8 as BC 6 | import Data.ByteString (ByteString) 7 | import Control.Monad 8 | import Data.List 9 | import Data.Maybe 10 | 11 | type Fields = [ByteString] 12 | type Schema = [ByteString] 13 | type Table = FilePath 14 | 15 | data Record = Record { fields :: Fields, schema :: Schema } deriving Show 16 | 17 | data Operator = Scan Table | Project Schema Schema Operator 18 | | Filter Predicate Operator | Join Operator Operator 19 | 20 | query :: Operator 21 | query = Project ["name"] ["name"] (Scan "data/test.csv") 22 | 23 | queryJoin :: Operator 24 | queryJoin = Join (Scan "data/test.csv") (Scan "data/test1.csv") 25 | 26 | 27 | data Predicate = Eq Ref Ref | Ne Ref Ref 28 | 29 | data Ref = Field ByteString | Value ByteString 30 | 31 | newtype Scanner = Scanner ByteString 32 | 33 | newScanner :: FilePath -> IO Scanner 34 | newScanner fp = Scanner <$> B.readFile fp 35 | 36 | nextLine :: Scanner -> (ByteString, Scanner) 37 | nextLine (Scanner bs) = 38 | let (fs, rs) = BC.span (/= '\n') bs 39 | in (fs, Scanner (BC.tail rs)) 40 | 41 | hasNext :: Scanner -> Bool 42 | hasNext (Scanner bs) = bs /= "" 43 | 44 | processCSV :: Monoid m => FilePath -> (Record -> IO m) -> IO m 45 | processCSV fp yld = do 46 | s <- newScanner fp 47 | scanner s 48 | 49 | where 50 | scanner s = do 51 | let (fs, rs) = nextLine s 52 | rows (BC.split ',' fs) rs 53 | 54 | rows fs s = 55 | whenM (hasNext s) 56 | (let (r, rest) = nextLine s 57 | in yld (Record (BC.split ',' r) fs) 58 | <> rows fs rest) 59 | 60 | whenM :: Monoid m => Bool -> m -> m 61 | whenM b act = if b then act else mempty 62 | 63 | 64 | printFields :: Fields -> IO () 65 | printFields [] = putStr "\n" 66 | printFields [b] = BC.putStr b >> putStr "\n" 67 | printFields (b:bs) = BC.putStr b >> BC.putStr "," >> printFields bs 68 | 69 | evalPred :: Predicate -> Record -> Bool 70 | evalPred predicate rec = 71 | case predicate of 72 | Eq a b -> evalRef a rec == evalRef b rec 73 | Ne a b -> evalRef a rec /= evalRef b rec 74 | 75 | evalRef :: Ref -> Record -> ByteString 76 | evalRef (Value a) _ = a 77 | evalRef (Field name) r = getField name r 78 | 79 | getField :: ByteString -> Record -> ByteString 80 | getField field (Record fs sch) = 81 | let i = fromJust (elemIndex field sch) 82 | in fs !! i 83 | 84 | getFields :: [ByteString] -> Record -> [ByteString] 85 | getFields fs r = map (flip getField r) fs 86 | 87 | restrict :: Record -> [ByteString] -> [ByteString] -> Record 88 | restrict r newSchema parentSchema = 89 | Record (getFields parentSchema r) newSchema 90 | 91 | execOp :: Monoid m => Operator -> (Record -> IO m) -> IO m 92 | execOp op yld = 93 | case op of 94 | Scan filename -> processCSV filename yld 95 | Filter predicate parent -> execOp parent (\rec -> whenM (evalPred predicate rec) (yld rec)) 96 | Project newSchema parentSchema parent -> execOp parent (\rec -> yld (restrict rec newSchema parentSchema )) 97 | Join left right -> 98 | execOp left (\rec -> execOp right (\rec' -> 99 | let keys = schema rec `intersect` schema rec' 100 | in whenM (getFields keys rec == getFields keys rec') 101 | (yld (Record (fields rec ++ fields rec') 102 | (schema rec ++ schema rec'))))) 103 | 104 | 105 | runQuery :: Operator -> IO () 106 | runQuery o = execOp o (printFields . fields) 107 | 108 | runQueryL :: Operator -> IO [Record] 109 | runQueryL o = execOp o (return . return) 110 | 111 | main :: IO () 112 | main = do 113 | processCSV "data/test.csv" (print . getField "name") 114 | runQuery query 115 | 116 | 117 | 118 | -------------------------------------------------------------------------------- /src/LMS.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE TemplateHaskell #-} 3 | {-# LANGUAGE NoImplicitPrelude #-} 4 | {-# LANGUAGE ScopedTypeVariables #-} 5 | {-# LANGUAGE KindSignatures #-} 6 | {-# LANGUAGE DataKinds #-} 7 | {-# LANGUAGE MultiParamTypeClasses #-} 8 | {-# LANGUAGE TypeApplications #-} 9 | {-# LANGUAGE AllowAmbiguousTypes #-} 10 | {-# LANGUAGE InstanceSigs #-} 11 | {-# LANGUAGE FlexibleContexts #-} 12 | {-# LANGUAGE FlexibleInstances #-} 13 | {-# LANGUAGE GADTs #-} 14 | {-# LANGUAGE TypeFamilies #-} 15 | {-# LANGUAGE ConstraintKinds #-} 16 | {-# LANGUAGE DeriveLift #-} 17 | module LMS where 18 | 19 | import qualified Data.ByteString as B 20 | import qualified Data.ByteString.Char8 as BC 21 | import Data.ByteString (ByteString) 22 | import Control.Monad 23 | import Data.List 24 | import Data.Maybe 25 | import Language.Haskell.TH 26 | import Language.Haskell.TH.Syntax 27 | import Prelude hiding (Applicative(..)) 28 | import Instances.TH.Lift () 29 | --import Data.FileEmbed 30 | import Data.Functor.Identity 31 | import Control.Applicative (liftA2) 32 | import System.IO.Unsafe 33 | import qualified Prelude as P 34 | import GHC.TypeLits 35 | import Control.Applicative (liftA3) 36 | 37 | import Instances.TH.Lift 38 | 39 | import Criterion 40 | import Criterion.Main 41 | import System.IO.Silently 42 | import System.Mem 43 | import Debug.Trace 44 | import System.IO 45 | 46 | for = flip map 47 | 48 | 49 | class Ops r where 50 | eq :: Eq a => r a -> r a -> r Bool 51 | neq :: Eq a => r a -> r a -> r Bool 52 | bc_split :: r Char -> r ByteString -> r [ByteString] 53 | tail_dropwhile :: Char -> r ByteString -> r ByteString 54 | take_while :: Char -> r ByteString -> r ByteString 55 | 56 | _if :: r Bool -> r a -> r a -> r a 57 | _caseString :: r ByteString -> r a -> r a -> r a 58 | _fix :: (r a -> r a) -> r a 59 | _lam :: (r a -> r b) -> r (a -> b) 60 | 61 | _bind :: Monad m => r (m a) -> (r (a -> m b)) -> r (m b) 62 | _print :: Show a => r a -> r Res 63 | _putStr :: r ByteString -> r Res 64 | _hPutStr :: r Handle -> r ByteString -> r Res 65 | (>>>) :: Monoid m => r m -> r m -> r m 66 | _empty :: Monoid m => r m 67 | _pure :: P.Applicative f => r a -> r (f a) 68 | 69 | -- Scanner interface 70 | _newScanner :: FilePath -> r (IO Scanner) 71 | _hasNext :: r Scanner -> r Bool 72 | _nextLine :: r Scanner -> (r ByteString, r Scanner) 73 | 74 | pure :: Lift a => a -> r a 75 | (<*>) :: r (a -> b) -> r a -> r b 76 | 77 | _case_record :: r (Record r1) 78 | -> r ((Fields r1) -> Schema -> c) 79 | -> r c 80 | 81 | _lup :: r ByteString -> r (Fields r1) -> r Schema -> r (r1 ByteString) 82 | 83 | _intersect :: Eq a => r [a] -> r [a] -> r [a] 84 | _mkRecord :: r (Fields r1) -> r Schema -> r (Record r1) 85 | _cons :: r a -> r [a] -> r [a] 86 | 87 | 88 | 89 | infixl 4 <*> 90 | 91 | newtype Code a = Code (Q (TExp a)) 92 | 93 | instance Ops Code where 94 | eq (Code e1) (Code e2) = Code [|| $$e1 == $$e2 ||] 95 | neq (Code e1) (Code e2) = Code [|| $$e1 /= $$e2 ||] 96 | bc_split (Code e1) (Code e2) = Code [|| BC.split $$e1 $$e2 ||] 97 | _if (Code a) (Code b) (Code c) = Code [|| if $$a then $$b else $$c ||] 98 | _caseString (Code a) (Code b) (Code c) = 99 | Code [|| case $$a of 100 | "" -> $$b 101 | _ -> $$c ||] 102 | (>>>) (Code a) (Code b) = Code [|| $$a <> $$b ||] 103 | _bind (Code a) (Code b) = Code [|| $$a >>= $$b ||] 104 | _empty = Code [|| mempty ||] 105 | _pure (Code p) = Code [|| P.pure $$p ||] 106 | 107 | _print (Code a) = Code [|| print $$a ||] 108 | _putStr (Code a) = Code [|| BC.putStr $$a ||] 109 | _hPutStr (Code a) (Code b) = Code [|| BC.hPutStr $$a $$b ||] 110 | 111 | tail_dropwhile c (Code b) = Code [|| BC.tail (BC.dropWhile (/= c) $$b) ||] 112 | take_while c (Code b) = Code [|| BC.takeWhile (/= c) $$b ||] 113 | _fix f = Code [|| fix (\a -> $$(runCode $ f (Code [||a||]))) ||] 114 | 115 | _lam f = Code $ [|| \a -> $$(runCode $ f (Code [|| a ||])) ||] 116 | 117 | 118 | _newScanner fp = Code [|| newScanner fp ||] 119 | _hasNext s = Code [|| hasNext $$(runCode s) ||] 120 | _nextLine = _nextLineCode 121 | 122 | 123 | 124 | pure = Code . unsafeTExpCoerce . lift 125 | (Code f) <*> (Code a) = Code [|| $$f $$a ||] 126 | 127 | _case_record (Code r) (Code k1) = 128 | Code [|| case $$r of 129 | Record rs ss -> $$(k1) rs ss ||] 130 | 131 | _lup (Code l1) (Code l2) (Code l3) = Code [|| lup $$l1 $$l2 $$l3 ||] 132 | 133 | _intersect (Code l1) (Code l2) = Code [|| $$l1 `intersect` $$l2 ||] 134 | 135 | _mkRecord (Code l1) (Code l2) = Code [|| Record $$l1 $$l2 ||] 136 | 137 | _cons (Code l1) (Code l2) = Code [|| $$l1 : $$l2 ||] 138 | 139 | 140 | instance Ops Identity where 141 | eq = liftA2 (==) 142 | neq = liftA2 (/=) 143 | bc_split = liftA2 (BC.split) 144 | tail_dropwhile c = fmap (BC.tail . (BC.dropWhile (/= c))) 145 | take_while c = fmap (BC.takeWhile (/= c)) 146 | _print = fmap print 147 | _putStr = fmap (BC.putStr) 148 | _hPutStr = liftA2 BC.hPut 149 | _empty = Identity mempty 150 | _if (Identity b) (Identity c1) (Identity c2) = Identity (if b then c1 else c2) 151 | 152 | _fix = fix 153 | _lam f = Identity (\a -> runIdentity (f (Identity a))) 154 | (>>>) = liftA2 mappend 155 | _bind = liftA2 (>>=) 156 | _pure = fmap (P.pure) 157 | _newScanner fp = Identity (newScanner fp) 158 | _hasNext = fmap hasNext 159 | _nextLine = nextLine 160 | 161 | pure = Identity 162 | (<*>) (Identity a1) (Identity a2) = Identity (a1 a2) 163 | 164 | _case_record (Identity r) (Identity k) = 165 | case r of 166 | Record rs ss -> Identity (k rs ss) 167 | 168 | _lup = liftA3 lup 169 | 170 | _intersect = liftA2 intersect 171 | _mkRecord = liftA2 Record 172 | 173 | _cons = liftA2 (:) 174 | 175 | 176 | 177 | _when :: (Monoid m, Ops r) => r Bool -> r m -> r m 178 | _when cond act = _if cond act _empty 179 | 180 | _whenA :: (P.Applicative f, Ops r) => r Bool -> r (f ()) -> r (f ()) 181 | _whenA cond act = _if cond act (_pure (pure ())) 182 | 183 | runCode :: Code a -> Q (TExp a) 184 | runCode (Code a) = a 185 | 186 | 187 | 188 | type Fields r = [r ByteString] 189 | type Schema = [ByteString] 190 | type Table = FilePath 191 | 192 | type Res = IO () 193 | 194 | data Record r = Record { fields :: Fields r, schema :: Schema } 195 | 196 | getField :: Ops r1 => r1 ByteString -> r1 (Record r) -> r1 (r ByteString) 197 | getField field r = 198 | _case_record r (_lam $ \fs -> _lam $ \sch -> _lup field fs sch) 199 | 200 | 201 | lup :: ByteString -> Fields r -> Schema -> r ByteString 202 | lup field fs sch = 203 | let i = fromJust (elemIndex field sch) 204 | in (fs !! i) 205 | 206 | --getFields :: [ByteString] -> Record r -> [r ByteString] 207 | --getFields fs r = map (flip getField r) fs 208 | 209 | 210 | data Operator = Scan FilePath Schema | Project Schema Schema Operator 211 | | Filter Predicate Operator | Join Operator Operator deriving (Lift, Show) 212 | 213 | cols n = take (2 ^ n) [BC.pack('f':show n) | n <- [0..]] 214 | 215 | fp c n = "data/data-" ++ show (10^n) ++ "-" ++ show (2 ^ c) ++ ".csv" 216 | 217 | 218 | queryP, queryJoin, queryLast, query, query2 :: Int -> Int -> Operator 219 | query c n = 220 | Project ["f0"] ["f0"] (Filter (Eq (Value "a") (Field "f0")) 221 | (Scan (fp c n) (cols c))) 222 | 223 | 224 | query2 c n = Project ["f0"] ["f0"] (Filter (Eq (Value "a") (Field "f0")) 225 | (Scan (fp c n) (cols c))) 226 | 227 | queryJoin c n = Join (Scan (fp c n) (cols c)) (Scan (fp c n) (cols c)) 228 | 229 | queryP c n = Project ["f0"] ["f0"] (Scan (fp c n) (cols c)) 230 | 231 | queryLast c n = 232 | let cs = cols c 233 | in Project [last cs] [last cs] (Scan (fp c n) cs) 234 | 235 | 236 | data Predicate = Eq Ref Ref | Ne Ref Ref deriving (Show, Lift) 237 | 238 | data Ref = Field ByteString | Value ByteString deriving (Show, Lift) 239 | 240 | type QTExp a = Code a 241 | 242 | fix :: (a -> a) -> a 243 | fix f = let x = f x in x 244 | 245 | data Scanner = Scanner ByteString 246 | 247 | newScanner :: FilePath -> IO Scanner 248 | newScanner fp = Scanner <$> B.readFile fp 249 | 250 | nextLine :: Identity Scanner -> (Identity ByteString, Identity Scanner) 251 | nextLine (Identity (Scanner bs)) = 252 | let (fs, rs) = BC.span (/= '\n') bs 253 | in (Identity fs, Identity (Scanner (BC.tail rs))) 254 | 255 | -- As |span| is not stage aware, it is more dynamic an necessary. Splitting 256 | -- the implementation up means that we can skip over entire rows if 257 | -- necessary in the generated code. 258 | _nextLineCode :: Code Scanner -> (Code ByteString, Code Scanner) 259 | _nextLineCode scanner = 260 | let fs = Code [|| let (Scanner s) = $$(runCode scanner) in BC.takeWhile (/= '\n') s ||] 261 | ts = Code [|| let (Scanner s) = $$(runCode scanner) in Scanner (BC.tail (BC.dropWhile (/= '\n') s)) ||] 262 | in (fs, ts) 263 | 264 | 265 | 266 | hasNext :: Scanner -> Bool 267 | hasNext (Scanner bs) = bs /= "" 268 | 269 | while :: 270 | (Ops r, Monoid m) => 271 | r (t -> Bool) -> r ((t -> IO m) -> t -> IO m) -> r (t -> IO m) 272 | while k b = _fix (\r -> _lam $ \rs -> _when (k <*> rs) (b <*> r <*> rs)) 273 | 274 | whenM :: Monoid m => Bool -> m -> m 275 | whenM b act = if b then act else mempty 276 | 277 | type family ResT r1 r2 :: * -> * where 278 | ResT Identity Code = Code 279 | ResT Code Identity = Code 280 | ResT Identity r = r 281 | 282 | processCSV :: forall m r r1 . (Monoid m, O r1 r) 283 | => Schema -> FilePath -> (r1 (Record r) -> (ResT r1 r) (IO m)) -> (ResT r1 r) (IO m) 284 | processCSV ss f yld = 285 | _newScanner f `_bind` rows ss 286 | where 287 | rows :: Schema -> (ResT r1 r) (Scanner -> (IO m)) 288 | rows sch = do 289 | while (_lam _hasNext) 290 | (_lam $ \r -> _lam $ \rs -> 291 | (let (hs, ts) = _nextLine rs 292 | rec = _mkRecord (parseRow sch hs) (pure sch) 293 | in yld rec >>> (r <*> ts)) 294 | ) 295 | 296 | -- We can't use the standard |BC.split| function here because 297 | -- we we statically know how far we will unroll. The result is then 298 | -- more static as we can do things like drop certain fields if we 299 | -- perform a projection. 300 | parseRow :: Schema -> (ResT r1 r) ByteString -> r1 [r ByteString] 301 | parseRow [] _ = _empty 302 | parseRow [_] b = 303 | weaken (take_while '\n' b) `_cons` _empty 304 | parseRow (_:ss') b = 305 | let new = tail_dropwhile ',' b 306 | rs = parseRow ss' new 307 | in weaken (take_while ',' b) `_cons` rs 308 | 309 | class Weaken r1 r where 310 | weaken :: (ResT r1 r) a -> r1 (r a) 311 | 312 | instance Weaken Code Identity where 313 | weaken (Code c) = Code [|| Identity $$c ||] 314 | 315 | instance Weaken Identity i where 316 | weaken i = Identity i 317 | 318 | 319 | 320 | printFields :: Ops r => r Handle -> Fields r -> r Res 321 | printFields _ [] = _empty 322 | printFields h [x] = _hPutStr h x >>> _hPutStr h (pure "\n") 323 | printFields h (x:xs) = 324 | _hPutStr h x >>> _hPutStr h (pure ",") >>> printFields h xs 325 | 326 | class Collapse r1 r2 where 327 | c :: r1 (r2 a) -> (ResT r1 r2) a 328 | 329 | instance Collapse Identity r where 330 | c (Identity a) = a 331 | 332 | instance Collapse Code Identity where 333 | c (Code c) = Code [|| runIdentity $$c ||] 334 | 335 | type O r1 r = (Collapse r1 r 336 | , Ops r 337 | , Ops (ResT r1 r) 338 | , Ops r1 339 | , Weaken r1 r) 340 | 341 | evalPred :: forall r1 r . (O r1 r) 342 | => Predicate -> r1 (Record r) -> (ResT r1 r) Bool 343 | evalPred predicate rec = 344 | case predicate of 345 | Eq a b -> eq (evalRef a rec) (evalRef b rec) 346 | Ne a b -> neq (evalRef a rec) (evalRef b rec) 347 | 348 | evalRef :: (Collapse r1 r 349 | , Ops r 350 | , Ops (ResT r1 r) 351 | , Ops r1 ) => Ref -> r1 (Record r) -> (ResT r1 r) ByteString 352 | evalRef (Value a) _ = pure a 353 | evalRef (Field name) r = c $ getField (pure name) r 354 | 355 | 356 | class O r1 r => ListEq (s :: Symbol) r r1 where 357 | list_eq :: (Eq a) => r1 [r a] -> r1 [r a] -> (ResT r1 r) Bool 358 | 359 | class O r1 r => Restrict (s :: Symbol) r r1 where 360 | restrict :: Ops r => r1 (Record r) -> Schema -> Schema -> r1 (Record r) 361 | 362 | 363 | -- Point of this instance is that we can use the normal `map` in order to 364 | -- get the value of each field at 365 | instance Restrict "unrolled" Code Identity where 366 | restrict r newSchema parentSchema 367 | | length parentSchema <= 10 = 368 | let ns = map Identity parentSchema 369 | nfs = sequence $ map (flip getField r) ns 370 | in Record <$> nfs <*> Identity newSchema 371 | restrict r newSchema parentSchema = 372 | restrict @"recursive" r newSchema parentSchema 373 | -- Record <$> (map (flip getField r) parentSchema) <*> _ 374 | 375 | instance Restrict s Identity Identity where 376 | restrict rec newSchema parentSchema = 377 | let ns = map Identity parentSchema 378 | r = map (runIdentity . flip getField rec) ns 379 | in Identity $ Record r parentSchema 380 | 381 | -- In this instance we lost the static information about 382 | instance Restrict "recursive" Identity Code where 383 | restrict :: Code (Record Identity) -> Schema -> Schema -> Code (Record Identity) 384 | restrict rec news ps = 385 | let ns = map pure ps 386 | r = map (flip getField rec) ns 387 | in Code [|| Record $$(runCode $ spillL r) ps ||] 388 | 389 | -- In this instance we just do the same as the unrolled version because 390 | -- that's already recursive so err, that's good I guess. 391 | -- 392 | -- Can't really think how to make this totally suck now because the problem 393 | -- is that we have to maintain the staticness of the schema so we can't 394 | -- really weaken. 395 | instance Restrict "recursive" Code Identity where 396 | restrict :: Identity (Record Code) -> Schema -> Schema -> Identity (Record Code) 397 | restrict (Identity rec) new ps = 398 | let fs = fields rec 399 | fs' = map (getField2 fs ps) (map pure ps) 400 | getField' = getField2 401 | in Identity (Record fs' new) 402 | 403 | getField2 :: Fields Code -> Schema -> Code ByteString -> Code ByteString 404 | getField2 fields ps lup_field = 405 | Code [|| runIdentity $ lup $$(runCode lup_field) $$(runCode $ spill2 fields) ps ||] 406 | 407 | 408 | -- One of my favourite functions <3 409 | spillL :: [Code a] -> Code [a] 410 | spillL [] = Code [|| [] ||] 411 | spillL (x:xs) = Code [|| $$(runCode x) : $$(runCode $ spillL xs) ||] 412 | 413 | spillQ :: [Q (TExp a)] -> Q (TExp [a]) 414 | spillQ [] = [|| [] ||] 415 | spillQ (x:xs) = [|| $$x : $$(spillQ xs) ||] 416 | 417 | instance ListEq "unrolled" Code Identity where 418 | list_eq :: (Eq a) => Identity [Code a] -> Identity [Code a] -> Code Bool 419 | list_eq (Identity xs) (Identity ys) = 420 | let lxs = length xs 421 | lys = length ys 422 | in if max lxs lys <= 10 423 | then list_eq_r xs ys 424 | else list_eq @"recursive" (Identity xs) (Identity ys) 425 | 426 | 427 | list_eq_r [] [] = pure True 428 | list_eq_r (v:vs) (v1:v1s) 429 | = _if (eq v v1) (list_eq_r vs v1s) (pure False) 430 | list_eq_r _ _ = pure False 431 | 432 | instance ListEq s Identity Identity where 433 | list_eq :: (Eq a) => Identity [Identity a] -> Identity [Identity a] -> Identity Bool 434 | list_eq xs ys = Identity (xs == ys) 435 | 436 | instance ListEq "recursive" Identity Code where 437 | list_eq :: Eq a => Code [Identity a] -> Code [Identity a] -> Code Bool 438 | list_eq (Code xs) (Code ys) = Code [|| $$(xs) == $$(ys) ||] 439 | 440 | 441 | pull :: [Code a] -> Code [a] 442 | pull [] = Code [|| [] ||] 443 | pull (x:xs) = Code [|| $$(runCode x) : $$(runCode $ pull xs) ||] 444 | 445 | instance ListEq "recursive" Code Identity where 446 | list_eq :: (Eq a) => Identity [Code a] -> Identity [Code a] -> Code Bool 447 | list_eq (Identity xs) (Identity ys) = Code [|| $$(runCode $ pull xs) == $$(runCode $ pull ys) ||] 448 | 449 | 450 | -- if r = Code then r1 = Identity 451 | -- 452 | -- if r = Identity then r1 = Code or Identity 453 | -- Having two type parameters means that we can either choose to use a 454 | -- continuation which statically knows the argument (r1 = Identity) or not. 455 | execOp :: forall restrict le r r1 m . ( 456 | Restrict restrict r r1 457 | , ListEq le r r1 458 | , Monoid m 459 | , Ops r 460 | , Ops r1) 461 | => Operator 462 | -> (r1 (Record r) -> (ResT r1 r) (IO m)) 463 | -> (ResT r1 r) (IO m) 464 | execOp op yld = 465 | case op of 466 | Scan file sch -> 467 | processCSV sch file yld 468 | Filter predicate parent -> execOp @restrict @le parent 469 | (\rec -> _if (evalPred @r1 @r predicate rec) (yld rec) _empty ) 470 | Project newSchema parentSchema parent -> 471 | execOp @restrict @le parent (\rec -> yld (restrict @restrict rec newSchema parentSchema )) 472 | Join left right -> 473 | execOp @restrict @le left (\rec -> execOp @restrict @le right (\rec' -> 474 | let k1 = _schema rec 475 | k2 = _fields rec 476 | keys = _intersect (_schema rec) (_schema rec') 477 | leq = list_eq @le @r @r1 @ByteString k2 (_fields rec') 478 | in _when leq 479 | (yld (_mkRecord (_fields rec >>> _fields rec') 480 | (_schema rec >>> _schema rec') 481 | )))) 482 | 483 | _fields r = _case_record r (_lam $ \fs -> _lam $ \_ -> fs) 484 | _schema r = _case_record r (_lam $ \_ -> _lam $ \ss -> ss) 485 | 486 | execOpU :: forall r r1 m . (Monoid m, O r r1 487 | , Restrict "unrolled" r r1 488 | , ListEq "unrolled" r r1) 489 | => Operator 490 | -> (r1 (Record r) -> (ResT r1 r) (IO m)) 491 | -> (ResT r1 r) (IO m) 492 | execOpU = execOp @"unrolled" @"unrolled" 493 | 494 | print_k h = 495 | (\r -> Code [|| $$(runCode $ printFields h (fields (runIdentity r))) ||]) 496 | 497 | 498 | wrap :: Code (Handle -> IO ()) -> Code (IO ()) 499 | wrap (Code c) = 500 | Code [|| withFile "/dev/null" WriteMode $$c ||] 501 | 502 | -- r1 = Identity; r = Code 503 | runQuery :: Operator -> Q (TExp ( Res)) 504 | runQuery q = 505 | runCode $ 506 | wrap (Code [|| \h -> $$(runCode $ execOpU q (print_k (Code [|| h ||]))) ||]) 507 | 508 | -- r = Identity; r1 = Identity 509 | runQueryUnstaged :: Operator -> ( Res) 510 | runQueryUnstaged q = 511 | --traceShow q 512 | withFile "/dev/null" WriteMode (\handle -> 513 | runIdentity (execOpU q 514 | (\r -> Identity ( 515 | runIdentity (printFields (Identity handle) (fields (runIdentity r))))))) 516 | 517 | runQueryUnstagedC :: Operator -> Q (TExp ( Res)) 518 | runQueryUnstagedC q = [|| runQueryUnstaged q ||] 519 | 520 | -- Unrolled le, rec r 521 | runQueryUR :: Operator -> Q (TExp ( Res)) 522 | runQueryUR q = 523 | runCode $ 524 | wrap (Code [|| \h -> $$(runCode $ execOp @"unrolled" @"recursive" q (print_k (Code [|| h ||]))) ||]) 525 | 526 | -- Rec le, unrolled r 527 | runQueryRU :: Operator -> Q (TExp ( Res)) 528 | runQueryRU q = 529 | runCode $ 530 | wrap (_lam $ \h -> execOp @"recursive" @"unrolled" q (print_k h)) 531 | 532 | -- Rec le, rec r 533 | runQueryRR :: Operator -> Q (TExp ( Res)) 534 | runQueryRR q = 535 | runCode $ 536 | wrap (_lam $ \h -> execOp @"recursive" @"recursive" q (print_k h)) 537 | 538 | -- r1 = Code; r = Identity 539 | -- If r1 = Code then we can't use the unrolled versions as we don't know 540 | -- the schema. We can only unroll the loop. 541 | runQueryDynamic :: Operator -> Q (TExp ( Res)) 542 | runQueryDynamic q = 543 | runCode ( 544 | wrap (_lam $ \h -> execOp @"recursive" @"recursive" @Identity @Code q (printFields17 h))) 545 | 546 | queries = [ --("query", query) 547 | --, ("query2", query2) 548 | --("queryJoin", queryJoin) 549 | ("queryP", queryP) 550 | , ("queryLast", queryLast)] 551 | 552 | runners = [ ("best", runQuery) 553 | , ("ur", runQueryUR) 554 | , ("ru", runQueryRU) 555 | , ("rr", runQueryRR) 556 | , ("dynamic", runQueryDynamic) 557 | , ("unstaged", runQueryUnstagedC) 558 | ] 559 | 560 | files = [(n, c) 561 | | n <- [3..3], c <- [1..3] ] 562 | 563 | genBench :: Q (TExp [Benchmark]) 564 | genBench = 565 | spillQ $ for runners $ \(name, r) -> 566 | [|| bgroup name $$(spillQ $ for queries $ \(qname, q) -> 567 | [|| bgroup qname $ $$(spillQ $ for files $ \(n, c) -> 568 | [|| let b = $$(r (q c n)) in bench $$(runCode $ pure $ fp c n) $ perBatchEnv (const performGC) (\() -> b) ||]) ||]) ||] 569 | 570 | printFields17 :: (Code Handle) -> Code (Record Identity) -> Code (IO ()) 571 | printFields17 (Code h) (Code c) = 572 | Code [|| runIdentity $ printFields (Identity $$h) (fields $$(c)) ||] 573 | 574 | spill :: Record Code -> Code (Record Identity) 575 | spill (Record rs ss) = Code [|| Record $$(runCode $ spill2 rs) ss ||] 576 | 577 | spill2 :: [Code ByteString] -> Code [Identity ByteString] 578 | spill2 [] = Code [|| [] ||] 579 | spill2 (x:xs) = Code [|| (Identity $$(runCode x)) : $$(runCode $ spill2 xs) ||] 580 | 581 | 582 | runQueryUnstagedL :: Operator -> IO [Record Identity] 583 | runQueryUnstagedL q = runIdentity (execOpU @Identity @Identity q (return . return . return . runIdentity)) 584 | 585 | test :: IO () 586 | test = do 587 | -- processCSV "data/test.csv" (print . getField "name") 588 | --expr <- runQ $ unTypeQ $ runCode $ execOpU query (printFields . fields) 589 | -- expr <- runQ $ unTypeQ $ power 590 | --putStrLn $ pprint expr 591 | print "" 592 | 593 | 594 | bsToExp :: B.ByteString -> Q Exp 595 | bsToExp bs = do 596 | helper <- [| stringToBs |] 597 | let chars = BC.unpack . BC.tail . (BC.dropWhile (/= '\n')) $ bs 598 | return $! AppE helper $! LitE $! StringL chars 599 | 600 | stringToBs :: String -> B.ByteString 601 | stringToBs = BC.pack 602 | 603 | -------------------------------------------------------------------------------- /src/SimpleInterpreter.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | module SimpleInterpreter where 3 | 4 | import qualified Data.ByteString as B 5 | import qualified Data.ByteString.Char8 as BC 6 | import Data.ByteString (ByteString) 7 | import Control.Monad 8 | import Data.List 9 | import Data.Maybe 10 | 11 | type Fields = [ByteString] 12 | type Schema = [ByteString] 13 | type Table = FilePath 14 | 15 | data Record = Record { fields :: Fields, schema :: Schema } deriving Show 16 | 17 | 18 | 19 | data Operator = Scan Table | Project Schema Schema Operator 20 | | Filter Predicate Operator | Join Operator Operator 21 | 22 | query :: Operator 23 | query = Project ["name"] ["name"] (Scan "data/test.csv") 24 | 25 | queryJoin :: Operator 26 | queryJoin = Join (Scan "data/test.csv") (Scan "data/test1.csv") 27 | 28 | 29 | data Predicate = Eq Ref Ref | Ne Ref Ref 30 | 31 | data Ref = Field ByteString | Value ByteString 32 | 33 | processCSV :: FilePath -> IO [Record] 34 | processCSV fp = do 35 | s <- B.readFile fp 36 | scanner s 37 | 38 | where 39 | scanner s = do 40 | let (fs, rs) = BC.span (/= '\n') s 41 | return $ rows (BC.split ',' fs) (BC.tail rs) 42 | 43 | rows :: Schema -> ByteString -> [Record] 44 | rows fs rs = 45 | case rs of 46 | "" -> [] 47 | _ -> 48 | let (r, rest) = BC.span (/= '\n') rs 49 | r' = (Record (BC.split ',' r) fs) 50 | in (r' : rows fs (BC.tail rest)) 51 | 52 | 53 | printFields :: Fields -> IO () 54 | printFields [] = putStr "\n" 55 | printFields [b] = BC.putStr b >> putStr "\n" 56 | printFields (b:bs) = BC.putStr b >> BC.putStr "," >> printFields bs 57 | 58 | evalPred :: Predicate -> Record -> Bool 59 | evalPred predicate rec = 60 | case predicate of 61 | Eq a b -> evalRef a rec == evalRef b rec 62 | Ne a b -> evalRef a rec /= evalRef b rec 63 | 64 | evalRef :: Ref -> Record -> ByteString 65 | evalRef (Value a) _ = a 66 | evalRef (Field name) r = getField name r 67 | 68 | getField :: ByteString -> Record -> ByteString 69 | getField field (Record fs sch) = 70 | let i = fromJust (elemIndex field sch) 71 | in fs !! i 72 | 73 | getFields :: [ByteString] -> Record -> [ByteString] 74 | getFields fs r = map (flip getField r) fs 75 | 76 | restrict :: [ByteString] -> [ByteString] -> Record -> Record 77 | restrict newSchema parentSchema r = 78 | Record (getFields parentSchema r) newSchema 79 | 80 | execOp :: Operator -> IO [Record] 81 | execOp op = 82 | case op of 83 | Scan filename -> processCSV filename 84 | --Print p -> execOp p (printFields . fields) 85 | Filter predicate parent -> do 86 | rs <- execOp parent 87 | return (filter (evalPred predicate) rs) 88 | Project newSchema parentSchema parent -> do 89 | rs <- execOp parent 90 | return $ map (restrict newSchema parentSchema) rs 91 | Join left right -> do 92 | ls <- execOp left 93 | rs <- execOp right 94 | return $ do 95 | rec <- ls 96 | rec' <- rs 97 | let keys = schema rec `intersect` schema rec' 98 | guard (getFields keys rec == getFields keys rec') 99 | (return (Record (fields rec ++ fields rec') 100 | (schema rec ++ schema rec'))) 101 | 102 | 103 | runQuery :: Operator -> IO () 104 | runQuery o = execOp o >>= print 105 | 106 | main :: IO () 107 | main = do 108 | -- processCSV "data/test.csv" (print . getField "name") 109 | execOp query >>= print 110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /src/SimpleStagedInterpreter.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE TemplateHaskell #-} 3 | {-# LANGUAGE NoImplicitPrelude #-} 4 | {-# LANGUAGE OverloadedStrings #-} 5 | module SimpleStagedInterpreter where 6 | 7 | import qualified Data.ByteString as B 8 | import qualified Data.ByteString.Char8 as BC 9 | import Data.ByteString (ByteString) 10 | import Control.Monad 11 | import Data.List 12 | import Data.Maybe 13 | import Language.Haskell.TH 14 | import Language.Haskell.TH.Syntax 15 | import Prelude hiding (Applicative(..)) 16 | import Instances.TH.Lift () 17 | 18 | class Ops r where 19 | eq :: Eq a => r a -> r a -> r Bool 20 | neq :: Eq a => r a -> r a -> r Bool 21 | bc_split :: r Char -> r ByteString -> r [ByteString] 22 | 23 | instance Ops Code where 24 | eq (Code e1) (Code e2) = Code [|| $$e1 == $$e2 ||] 25 | neq (Code e1) (Code e2) = Code [|| $$e1 /= $$e2 ||] 26 | bc_split (Code e1) (Code e2) = Code [|| BC.split $$e1 $$e2 ||] 27 | 28 | newtype Code a = Code (Q (TExp a)) 29 | 30 | runCode :: Code a -> QTExp a 31 | runCode (Code a) = a 32 | 33 | 34 | pure :: Lift a => a -> Code a 35 | pure = Code . unsafeTExpCoerce . lift 36 | 37 | infixl 4 <*> 38 | (<*>) :: Code (a -> b) -> Code a -> Code b 39 | (Code f) <*> (Code a) = Code [|| $$f $$a ||] 40 | 41 | type Fields = [QTExp ByteString] 42 | type Schema = [ByteString] 43 | type Res = IO () 44 | type Table = FilePath 45 | 46 | data Record = Record { fields :: Fields, schema :: Schema } 47 | 48 | getField :: ByteString -> Record -> QTExp ByteString 49 | getField field (Record fs sch) = 50 | let i = fromJust (elemIndex field sch) 51 | in (fs !! i) 52 | 53 | getFields :: [ByteString] -> Record -> [QTExp ByteString] 54 | getFields fs r = map (flip getField r) fs 55 | 56 | 57 | data Operator = Scan FilePath Schema | Print Operator | Project Schema Schema Operator 58 | | Filter Predicate Operator | Join Operator Operator deriving Show 59 | 60 | query :: Operator 61 | query = Project ["age"] ["age"] (Filter (Eq (Value "john") (Field "name")) (Scan "data/test.csv" ["name", "age"])) 62 | 63 | query2 :: Operator 64 | query2 = Project ["name"] ["name"] (Filter (Eq (Value "34") (Field "age")) (Scan "data/test.csv" ["name", "age"])) 65 | 66 | queryJoin :: Operator 67 | queryJoin = Join (Scan "data/test.csv" ["name", "age"]) (Scan "data/test1.csv" ["name", "weight"]) 68 | 69 | 70 | data Predicate = Eq Ref Ref | Ne Ref Ref deriving Show 71 | 72 | data Ref = Field ByteString | Value ByteString deriving Show 73 | 74 | type QTExp a = Q (TExp a) 75 | 76 | fix :: (a -> a) -> a 77 | fix f = let x = f x in x 78 | 79 | parseRow' :: Schema -> QTExp ByteString -> QTExp ByteString 80 | parseRow' [] b = b 81 | parseRow' [_] b = [|| (BC.tail (BC.dropWhile (/= '\n') $$b)) ||] 82 | parseRow' (_:ss) b = parseRow' ss [|| (BC.tail (BC.dropWhile (/= ',') $$b)) ||] 83 | 84 | processCSV :: Schema -> FilePath -> QTExp (IO [Record]) 85 | processCSV ss fp = 86 | [|| do 87 | bs <- B.readFile fp 88 | return $ $$(rows ss) bs ||] 89 | where 90 | rows :: Schema -> QTExp (ByteString -> [Record]) 91 | rows sch = do 92 | [|| 93 | fix (\r rs -> 94 | case rs of 95 | "" -> [] 96 | _ -> 97 | $$(let (_, fs) = parseRow sch [||rs||] 98 | Code head_rec = pure (Record fs sch) 99 | in head_rec) : r $$(parseRow' sch [||rs||]) )||] 100 | 101 | 102 | parseRow :: Schema -> QTExp ByteString -> (QTExp ByteString, [QTExp ByteString]) 103 | parseRow [] b = (b, []) 104 | parseRow [_] b = 105 | ([|| let res = BC.dropWhile (/= '\n') $$b in BC.tail res ||] 106 | , [[|| BC.takeWhile (/= '\n') $$b ||]]) 107 | 108 | parseRow (_:ss') b = 109 | let new = [|| let res = BC.dropWhile (/= ',') $$b in BC.tail res ||] 110 | (final, rs) = parseRow ss' new 111 | in (final, [|| BC.takeWhile (/= ',') $$b ||] : rs) 112 | 113 | evalPred :: Predicate -> Record -> Bool 114 | evalPred predicate rec = 115 | case predicate of 116 | Eq a b -> (==) (evalRef a rec) (evalRef b rec) 117 | Ne a b -> (/=) (evalRef a rec) (evalRef b rec) 118 | 119 | evalRef :: Ref -> Record -> ByteString 120 | evalRef (Value a) _ = a 121 | evalRef (Field name) r = getField name r 122 | 123 | filterC :: (Record -> Code Bool) -> [Record] -> Code [Record] 124 | filterC f [] = Code [|| [] ||] 125 | filterC f (x:xs) = Code 126 | [|| if $$(runCode $ f x) then x : $$(runCode $ filterC f xs) else $$(runCode $ filterC f xs) ||] 127 | 128 | --filterC2 :: Lift a => (Code a -> Code Bool) -> Code [a] -> Code [a] 129 | --filterC2 f xs = Code [|| filter (\a -> $$(runCode $ f (Code [|| a ||]))) $$(runCode xs) ||] 130 | 131 | 132 | restrict :: Record -> Schema -> Schema -> Record 133 | restrict r newSchema parentSchema = 134 | Record (map (flip getField r) parentSchema) newSchema 135 | 136 | execOp :: Operator -> QTExp (IO [Record]) 137 | execOp op = 138 | case op of 139 | Scan filename sch -> processCSV sch filename 140 | --Print p -> execOp p (printFields . fields) 141 | Filter predicate parent -> [|| do 142 | rs <- $$(execOp parent) 143 | return $ filter (evalPred predicate) rs ||] 144 | {- 145 | Project newSchema parentSchema parent -> do 146 | rs <- execOp parent 147 | return $ map (restrict newSchema parentSchema) rs 148 | Join left right -> do 149 | ls <- execOp left 150 | rs <- execOp right 151 | return $ do 152 | rec <- ls 153 | rec' <- rs 154 | let keys = schema rec `intersect` schema rec' 155 | guard (getFields keys rec == getFields keys rec') 156 | (return (Record (fields rec ++ fields rec') 157 | (schema rec ++ schema rec'))) 158 | -} 159 | 160 | 161 | runQuery :: Operator -> IO () 162 | runQuery o = execOp o >>= print 163 | 164 | main :: IO () 165 | main = do 166 | -- processCSV "data/test.csv" (print . getField "name") 167 | execOp query >>= print 168 | 169 | 170 | 171 | -------------------------------------------------------------------------------- /src/StreamInterpreter.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE ScopedTypeVariables #-} 3 | {-# LANGUAGE RankNTypes #-} 4 | {-# LANGUAGE ExistentialQuantification #-} 5 | {-# LANGUAGE FlexibleInstances #-} 6 | {-# LANGUAGE BangPatterns #-} 7 | module StreamInterpreter where 8 | 9 | import qualified Data.ByteString.Streaming as B 10 | import qualified Data.ByteString.Streaming.Char8 as Q 11 | import qualified Data.ByteString.Char8 as BC 12 | import Data.ByteString (ByteString) 13 | import Control.Monad 14 | import Data.List 15 | import Data.Maybe 16 | import Streaming.Internal 17 | import qualified Streaming.Prelude as S 18 | import Streaming 19 | import Control.Monad.Trans.Resource 20 | import Debug.Trace 21 | import qualified Data.ByteString.Short as B 22 | (ShortByteString, toShort, fromShort) 23 | 24 | type Fields = [B.ShortByteString] 25 | type Schema = [B.ShortByteString] 26 | type Table = FilePath 27 | 28 | data Record = Record { fields :: !Fields, schema :: !Schema } 29 | 30 | data Operator = Scan Table | Project Schema Schema Operator 31 | | Filter Predicate Operator | Join Operator Operator 32 | 33 | query, query2 :: Operator 34 | query = Project ["age"] ["age"] (Filter (Eq (Value "john") (Field "name")) (Scan "data/test.csv" )) 35 | 36 | query2 = Project ["name"] ["name"] (Filter (Eq (Value "34") (Field "age")) (Scan "data/test.csv" )) 37 | 38 | queryJoin :: Operator 39 | queryJoin = Join (Scan "data/test.csv" ) (Scan "data/test1.csv" ) 40 | 41 | queryP :: Operator 42 | queryP = Project ["name"] ["name"] (Scan "data/test.csv" ) 43 | 44 | data Predicate = Eq Ref Ref | Ne Ref Ref 45 | 46 | data Ref = Field B.ShortByteString | Value B.ShortByteString 47 | 48 | newtype Scanner = Scanner (BS ()) 49 | 50 | type BS a = Q.ByteString ResIO a 51 | 52 | newScanner :: FilePath -> Scanner 53 | newScanner fp = Scanner (Q.readFile fp) 54 | 55 | nextLine :: Scanner -> Q.ByteString ResIO Scanner 56 | nextLine (Scanner bs) = 57 | Scanner <$> Q.span (/= '\n') bs 58 | 59 | hasNext :: Scanner -> ResIO (Of Bool Scanner) 60 | hasNext (Scanner bs) = fmap Scanner <$> Q.testNull bs 61 | 62 | data L m a = L [m a] 63 | 64 | processCSV :: forall m . Monoid m => FilePath -> (Record -> ResIO m) -> ResIO m 65 | processCSV fp yld = do 66 | scanner (newScanner fp) 67 | where 68 | scanner :: Scanner -> ResIO m 69 | scanner s = do 70 | fs :> rs <- Q.toStrict (nextLine s) 71 | let sch = map B.toShort (BC.split ',' fs) 72 | let f = rows sch rs 73 | destroy f (per_row sch) join (\_ -> pure mempty) -- (per_row sch yld) f 74 | 75 | per_row :: Schema -> Q.ByteString ResIO (ResIO m) -> ResIO m 76 | per_row fs s = 77 | let vs = S.toList $ mapped (fmap (first B.toShort) . Q.toStrict) $ Q.split ',' s 78 | in do 79 | a <- vs 80 | case a of 81 | ([""] :> m) -> m 82 | (bs :> m) -> yld (Record bs fs) <> m 83 | 84 | 85 | rows :: Schema -> Scanner -> Stream (Q.ByteString ResIO) ResIO () 86 | rows fs (Scanner s) = Q.lines s 87 | 88 | whenM :: Monoid m => Bool -> m -> m 89 | whenM b act = if b then act else mempty 90 | 91 | 92 | printFields :: Fields -> IO () 93 | printFields [] = putStr "\n" 94 | printFields [!b] = BC.putStr (B.fromShort b) >> putStr "\n" 95 | printFields (!b:bs) = BC.putStr (B.fromShort b) >> BC.putStr "," >> printFields bs 96 | 97 | printFields2 [] = return () 98 | printFields2 (!b:bs) = printFields2 bs 99 | 100 | evalPred :: Predicate -> Record -> Bool 101 | evalPred predicate rec = 102 | case predicate of 103 | Eq a b -> evalRef a rec == evalRef b rec 104 | Ne a b -> evalRef a rec /= evalRef b rec 105 | 106 | evalRef :: Ref -> Record -> B.ShortByteString 107 | evalRef (Value a) _ = a 108 | evalRef (Field name) r = getField name r 109 | 110 | getField :: B.ShortByteString -> Record -> B.ShortByteString 111 | getField field (Record fs sch) = 112 | let i = fromJust (elemIndex field sch) 113 | in fs !! i 114 | 115 | getFields :: [B.ShortByteString] -> Record -> [B.ShortByteString] 116 | getFields fs r = map (flip getField r) fs 117 | 118 | restrict :: Record -> [B.ShortByteString] -> [B.ShortByteString] -> Record 119 | restrict r newSchema parentSchema = 120 | Record (getFields parentSchema r) newSchema 121 | 122 | execOp :: Monoid m => Operator -> (Record -> ResIO m) -> ResIO m 123 | execOp op yld = 124 | case op of 125 | Scan filename -> processCSV filename yld 126 | Filter predicate parent -> execOp parent (\rec -> whenM (evalPred predicate rec) (yld rec)) 127 | Project newSchema parentSchema parent -> execOp parent (\rec -> yld (restrict rec newSchema parentSchema )) 128 | Join left right -> 129 | execOp left (\rec -> execOp right (\rec' -> 130 | let keys = schema rec `intersect` schema rec' 131 | in whenM (getFields keys rec == getFields keys rec') 132 | (yld (Record (fields rec ++ fields rec') 133 | (schema rec ++ schema rec'))))) 134 | 135 | 136 | runQuery :: Operator -> IO () 137 | runQuery o = runResourceT $ execOp o (liftIO . printFields . fields) 138 | 139 | runQueryL :: Operator -> IO [Record] 140 | runQueryL o = runResourceT $ execOp o (return . return) 141 | 142 | main :: IO () 143 | main = do 144 | runResourceT $ processCSV "data/test.csv" (liftIO . print . getField "name") 145 | runQuery query 146 | 147 | 148 | instance Semigroup m => Semigroup (ResourceT IO m) where 149 | (<>) a1 a2 = do 150 | (<>) <$> a1 <*> a2 151 | 152 | instance Monoid m => Monoid (ResourceT IO m) where 153 | mempty = pure mempty 154 | 155 | 156 | -------------------------------------------------------------------------------- /src/StreamInterpreter2.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE ScopedTypeVariables #-} 3 | {-# LANGUAGE RankNTypes #-} 4 | {-# LANGUAGE ExistentialQuantification #-} 5 | {-# LANGUAGE FlexibleInstances #-} 6 | {-# LANGUAGE BangPatterns #-} 7 | {-# LANGUAGE DeriveFunctor #-} 8 | module StreamInterpreter2 where 9 | 10 | -- Like stream interpreter but with a different yield function 11 | 12 | import qualified Data.ByteString.Streaming as B 13 | import qualified Data.ByteString.Streaming.Char8 as Q 14 | import qualified Data.ByteString.Char8 as BC 15 | import Data.ByteString (ByteString) 16 | import Control.Monad 17 | import Data.List 18 | import Data.Maybe 19 | import Streaming.Internal 20 | import qualified Streaming.Prelude as S 21 | import Streaming 22 | import Control.Monad.Trans.Resource 23 | import Debug.Trace 24 | import qualified Data.ByteString.Short as B 25 | (ShortByteString, toShort, fromShort) 26 | 27 | data OfBSG m a b = OfBS !a (Q.ByteString m b) deriving (Functor) 28 | 29 | type OfBS a b = OfBSG ResIO a b 30 | 31 | getBS :: OfBS a b -> Q.ByteString ResIO b 32 | getBS (OfBS a b) = b 33 | 34 | -- How to parse one field, and the result of parsing the rest of the file 35 | type Fields a = Stream (OfBSG ResIO B.ShortByteString) ResIO a 36 | type Schema = [B.ShortByteString] 37 | type Table = FilePath 38 | 39 | data Record a = Record { fields :: !(Fields a), schema :: !Schema } 40 | 41 | data Operator = Scan Table | Project Schema Schema Operator 42 | | Filter Predicate Operator | Join Operator Operator 43 | 44 | query, query2 :: Operator 45 | query = Project ["age"] ["age"] (Filter (Eq (Value "john") (Field "name")) (Scan "data/test.csv" )) 46 | 47 | query2 = Project ["name"] ["name"] (Filter (Eq (Value "34") (Field "age")) (Scan "data/test.csv" )) 48 | 49 | queryJoin :: Operator 50 | queryJoin = Join (Scan "data/test.csv" ) (Scan "data/test1.csv" ) 51 | 52 | queryP :: Operator 53 | queryP = Project ["name"] ["name"] (Scan "data/test.csv" ) 54 | 55 | queryF = (Filter (Eq (Value "john") (Field "name")) (Scan "data/test.csv" )) 56 | 57 | data Predicate = Eq Ref Ref | Ne Ref Ref 58 | 59 | data Ref = Field B.ShortByteString | Value B.ShortByteString 60 | 61 | newtype Scanner = Scanner (BS ()) 62 | 63 | type BS a = Q.ByteString ResIO a 64 | 65 | newScanner :: FilePath -> Scanner 66 | newScanner fp = Scanner (Q.readFile fp) 67 | 68 | nextLine :: Scanner -> Q.ByteString ResIO Scanner 69 | nextLine (Scanner bs) = 70 | Scanner <$> Q.span (/= '\n') bs 71 | 72 | hasNext :: Scanner -> ResIO (Of Bool Scanner) 73 | hasNext (Scanner bs) = fmap Scanner <$> Q.testNull bs 74 | 75 | 76 | data L m a = L [m a] 77 | 78 | processCSV :: forall m . Monoid m => FilePath -> (Record (ResIO m) -> ResIO m) -> ResIO m 79 | processCSV fp yld = do 80 | scanner (newScanner fp) 81 | where 82 | scanner :: Scanner -> ResIO m 83 | scanner s = do 84 | fs :> rs <- Q.toStrict (nextLine s) 85 | let sch = map B.toShort (BC.split ',' fs) 86 | let f = rows sch rs 87 | destroy f (per_row sch) join (\_ -> pure mempty) -- (per_row sch yld) f 88 | 89 | per_row :: Schema -> Q.ByteString ResIO (ResIO m) -> ResIO m 90 | per_row fs s = 91 | let vs = Q.split ',' s 92 | in yld $ Record (zipWithStream (OfBS) fs vs) fs 93 | 94 | 95 | rows :: Schema -> Scanner -> Stream (Q.ByteString ResIO) ResIO () 96 | rows fs (Scanner s) = Q.lines s 97 | 98 | zipWithStream 99 | :: (Monad m, Functor n) 100 | => (forall x . a -> Q.ByteString m x -> n x) 101 | -> [a] 102 | -> Stream (Q.ByteString m) m r 103 | -> Stream n m r 104 | zipWithStream op zs = loop zs 105 | where 106 | loop [] !ls = loop zs ls 107 | loop a@(x:xs) ls = case ls of 108 | Return r -> Return r 109 | Step fls -> Step $ fmap (loop xs) (op x fls) 110 | Effect mls -> Effect $ liftM (loop a) mls 111 | 112 | {-#INLINABLE zipWithStream #-} 113 | 114 | whenM :: (Monad n, Monoid m) => n Bool -> n m -> n m 115 | whenM ba act = ba >>= \b -> if b then act else return mempty 116 | 117 | 118 | printFields :: Fields (ResIO r) -> ResIO r 119 | printFields s = join (Q.putStrLn (Q.intercalate "," (maps getBS s))) 120 | {- 121 | printFields [] = putStr "\n" 122 | printFields [!b] = BC.putStr (B.fromShort b) >> putStr "\n" 123 | printFields (!b:bs) = BC.putStr (B.fromShort b) >> BC.putStr "," >> printFields bs 124 | -} 125 | 126 | printFields2 [] = return () 127 | printFields2 (!b:bs) = printFields2 bs 128 | 129 | evalPred :: Predicate -> Record (ResIO a) 130 | -> (Record (ResIO m) -> ResIO m) 131 | -> ResIO m 132 | evalPred predicate (Record d_fs sch) k = do 133 | let r = toListBS $ d_fs 134 | 135 | return _ 136 | -- k (Record s sch) 137 | 138 | where 139 | go (fs :> s) = _ 140 | pur_go fs = 141 | case predicate of 142 | Eq a b -> evalRef a fs sch == evalRef b fs sch 143 | Ne a b -> evalRef a fs sch /= evalRef b fs sch 144 | 145 | evalRef :: Ref -> [B.ShortByteString] -> [B.ShortByteString] -> B.ShortByteString 146 | evalRef (Value a) _ _ = a 147 | evalRef (Field name) fs sch = getField name fs sch 148 | 149 | -- Copy and read all the fields in a stream 150 | toListBS' :: forall a b . (Stream (OfBSG ResIO a) ResIO b) 151 | -> ResIO (Of [B.ShortByteString] (Stream (OfBSG ResIO a) ResIO b)) 152 | toListBS' s = destroy s _ _ _ 153 | where 154 | go :: OfBSG 155 | ResIO 156 | a 157 | (ResIO (Of [B.ShortByteString] (Stream (OfBSG ResIO a) ResIO b))) 158 | -> ResIO (Of [B.ShortByteString] (Stream (OfBSG ResIO a) ResIO b)) 159 | go (OfBS a b) = do 160 | let v = Q.toStrict (Q.copy b) 161 | -- bs :> k2 <- k 162 | return _ --((B.toShort b : bs) :> k2) 163 | 164 | 165 | -- Copy and read all the fields in a stream 166 | toListBS :: forall a b . (Stream (OfBSG ResIO a) ResIO b) 167 | -> Stream (OfBSG ResIO a) ResIO (Of [B.ShortByteString] b) 168 | toListBS s = destroy s go effect (\b -> return ([] :> b)) 169 | where 170 | go :: OfBSG 171 | ResIO a (Stream (OfBSG ResIO a) ResIO (Of [B.ShortByteString] b)) 172 | -> Stream (OfBSG ResIO a) ResIO (Of [B.ShortByteString] b) 173 | 174 | go (OfBS a b) = do 175 | let v = Q.toStrict (Q.copy b) 176 | g (bs :> s) = fmap (\(bss :> b) -> (B.toShort bs: bss) :> b) s 177 | wrap (OfBS a (fmap g v)) 178 | -- ls :> b' <- fs 179 | -- return ((B.toShort l : ls) :> b') 180 | 181 | getField :: B.ShortByteString -> [B.ShortByteString] 182 | -> [B.ShortByteString] -> B.ShortByteString 183 | getField field fs sch = 184 | let i = fromJust (elemIndex field sch) 185 | in fs !! i 186 | 187 | getFields :: [B.ShortByteString] -> Record a -> ResIO [B.ShortByteString] 188 | getFields fs (Record d_fs sch) = do 189 | fs <- toListBS d_fs 190 | return $ map (\f -> getField f fs sch) fs 191 | 192 | restrict :: Record a -> [B.ShortByteString] -> [B.ShortByteString] -> Record a 193 | restrict r newSchema parentSchema = 194 | Record (filterQ (`elem` parentSchema) (fields r)) newSchema 195 | 196 | -- | Skip elements of a stream that fail a predicate 197 | filterQ :: forall a r m . Monad m => (a -> Bool) -> Stream (OfBSG m a) m r -> Stream (OfBSG m a) m r 198 | filterQ thePred = loop where 199 | loop :: Monad m => Stream (OfBSG m a) m r -> Stream (OfBSG m a) m r 200 | loop str = case str of 201 | Return r -> Return r 202 | Effect m -> Effect (fmap loop m) 203 | Step (a `OfBS` as) -> if thePred a 204 | then Step (a `OfBS` (fmap loop as)) 205 | else Step (a `OfBS` lift (Q.effects (fmap loop as))) 206 | 207 | 208 | execOp :: Monoid m => Operator -> (Record (ResIO m) -> ResIO m) -> ResIO m 209 | execOp op yld = 210 | case op of 211 | Scan filename -> processCSV filename yld 212 | Filter predicate parent -> execOp parent (\rec -> (evalPred predicate rec) yld) 213 | Project newSchema parentSchema parent -> execOp parent (\rec -> yld (restrict rec newSchema parentSchema )) 214 | Join left right -> 215 | execOp left (\rec -> execOp right (\rec' -> 216 | let keys = schema rec `intersect` schema rec' 217 | in whenM ((==) <$> (getFields keys rec) <*> (getFields keys rec')) 218 | (yld (Record (fields rec <> fields rec') 219 | (schema rec ++ schema rec'))))) 220 | 221 | 222 | runQuery :: Operator -> IO () 223 | runQuery o = runResourceT $ execOp o (printFields . fields) 224 | 225 | {- 226 | runQueryL :: Operator -> IO [Record ()] 227 | runQueryL o = runResourceT $ execOp o _ 228 | -} 229 | 230 | main :: IO () 231 | main = do 232 | -- runResourceT $ processCSV "data/test.csv" (liftIO . print . getField "name") 233 | runQuery query 234 | 235 | 236 | instance Semigroup m => Semigroup (ResourceT IO m) where 237 | (<>) a1 a2 = do 238 | (<>) <$> a1 <*> a2 239 | 240 | instance Monoid m => Monoid (ResourceT IO m) where 241 | mempty = pure mempty 242 | 243 | 244 | -------------------------------------------------------------------------------- /src/StreamLMS.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE TemplateHaskell #-} 3 | {-# LANGUAGE NoImplicitPrelude #-} 4 | {-# LANGUAGE ScopedTypeVariables #-} 5 | {-# LANGUAGE FlexibleInstances #-} 6 | {-# LANGUAGE Strict #-} 7 | {-# OPTIONS_GHC -Wno-orphans #-} 8 | module StreamLMS where 9 | 10 | import qualified Data.ByteString as B 11 | import qualified Data.ByteString.Char8 as BC 12 | import Data.ByteString (ByteString) 13 | import Control.Monad 14 | import Data.List 15 | import Data.Maybe 16 | import Language.Haskell.TH 17 | import Language.Haskell.TH.Syntax 18 | import Prelude hiding (Applicative(..)) 19 | import Instances.TH.Lift () 20 | --import Data.FileEmbed 21 | import Data.Functor.Identity 22 | import Control.Applicative (liftA2) 23 | import qualified Prelude as P 24 | import qualified Data.ByteString.Streaming.Char8 as Q 25 | import Control.Monad.Trans.Resource 26 | import System.IO 27 | import Debug.Trace 28 | import Data.Functor.Of 29 | 30 | class Ops r where 31 | eq :: Eq a => r a -> r a -> r Bool 32 | leq :: Eq a => r (IO a) -> r (IO a) -> r (IO Bool) 33 | lneq :: Eq a => r (IO a) -> r (IO a) -> r (IO Bool) 34 | neq :: Eq a => r a -> r a -> r Bool 35 | bc_split :: r Char -> r ByteString -> r [ByteString] 36 | tail_dropwhile :: Char -> r (BS ()) -> r (BS ()) 37 | take_while :: Char -> r (BS ()) -> r (BS ()) 38 | 39 | _case_of :: r (Of a b) -> r (a -> b -> c) -> r c 40 | 41 | _if :: r Bool -> r a -> r a -> r a 42 | _fix :: (r a -> r a) -> r a 43 | _lam :: (r a -> r b) -> r (a -> b) 44 | 45 | _bind :: Monad m => r (m a) -> (r (a -> m b)) -> r (m b) 46 | _print :: Show a => r a -> r Res 47 | _putStr :: r (BS ()) -> r (IO ()) 48 | (>>>) :: Monoid m => r m -> r m -> r m 49 | _empty :: Monoid m => r m 50 | _pure :: P.Applicative f => r a -> r (f a) 51 | 52 | -- Scanner interface 53 | _newScanner :: FilePath -> r (IO Scanner) 54 | _hasNext :: r Scanner -> r (IO (Of Bool Scanner)) 55 | _nextLine :: r Scanner -> (r (BS ()), r Scanner) 56 | 57 | liftBS :: r ByteString -> r (BS ()) 58 | runBS :: r (BS ()) -> r (IO ByteString) 59 | 60 | pure :: Lift a => a -> r a 61 | (<*>) :: r (a -> b) -> r a -> r b 62 | 63 | infixl 4 <*> 64 | 65 | newtype Code a = Code (Q (TExp a)) 66 | 67 | instance Ops Code where 68 | eq (Code e1) (Code e2) = Code [|| $$e1 == $$e2 ||] 69 | neq (Code e1) (Code e2) = Code [|| $$e1 /= $$e2 ||] 70 | leq (Code e1) (Code e2) = Code [|| (==) <$> $$e1 P.<*> $$e2 ||] 71 | lneq (Code e1) (Code e2) = Code [|| (/=) <$> $$e1 P.<*> $$e2 ||] 72 | bc_split (Code e1) (Code e2) = Code [|| BC.split $$e1 $$e2 ||] 73 | _if (Code a) (Code b) (Code c) = Code [|| if $$a then $$b else $$c ||] 74 | (>>>) (Code a) (Code b) = Code [|| $$a <> $$b ||] 75 | _bind (Code a) (Code b) = Code [|| $$a >>= $$b ||] 76 | _empty = Code [|| mempty ||] 77 | _pure (Code p) = Code [|| P.pure $$p ||] 78 | 79 | _print (Code a) = Code [|| print $$a ||] 80 | _putStr (Code a) = Code [|| Q.putStr $$a ||] 81 | 82 | _case_of (Code a) (Code b) = Code [|| case $$a of 83 | (v1 :> v2) -> $$b v1 v2 ||] 84 | 85 | tail_dropwhile c (Code b) = Code [|| Q.drop 1 (Q.dropWhile (/= c) $$b) ||] 86 | take_while c (Code b) = Code [|| Q.takeWhile (/= c) $$b ||] 87 | _fix f = Code [|| fix (\a -> $$(runCode $ f (Code [||a||]))) ||] 88 | 89 | _lam f = Code $ [|| \a -> $$(runCode $ f (Code [|| a ||])) ||] 90 | 91 | liftBS (Code b) = Code $ [|| Q.fromStrict $$(b) ||] 92 | runBS (Code b) = Code $ [|| Q.toStrict_ $$b ||] 93 | 94 | 95 | _newScanner fp = Code [|| newScanner fp ||] 96 | _hasNext s = Code [|| hasNext $$(runCode s) ||] 97 | _nextLine = _nextLineCode 98 | 99 | 100 | 101 | pure = Code . unsafeTExpCoerce . lift 102 | (Code f) <*> (Code a) = Code [|| $$f $$a ||] 103 | 104 | 105 | instance Ops Identity where 106 | eq = liftA2 (==) 107 | leq = liftA2 (liftA2 (==)) 108 | lneq = liftA2 (liftA2 (==)) 109 | 110 | neq = liftA2 (/=) 111 | bc_split = liftA2 (BC.split) 112 | tail_dropwhile c = fmap (Q.drop 1 . (Q.dropWhile (/= c))) 113 | take_while c = fmap (Q.takeWhile (/= c)) 114 | _print = fmap print 115 | _putStr = fmap (Q.putStr) 116 | _empty = Identity mempty 117 | _if (Identity b) (Identity c1) (Identity c2) = Identity (if b then c1 else c2) 118 | 119 | _fix = fix 120 | _lam f = Identity (\a -> runIdentity (f (Identity a))) 121 | (>>>) = liftA2 mappend 122 | _bind = liftA2 (>>=) 123 | _pure = fmap (P.pure) 124 | _newScanner fp = Identity (newScanner fp) 125 | _hasNext = fmap hasNext 126 | _nextLine = nextLine 127 | 128 | liftBS b = Q.fromStrict <$> b 129 | runBS b = Q.toStrict_ <$> b 130 | 131 | pure = Identity 132 | (<*>) (Identity a1) (Identity a2) = Identity (a1 a2) 133 | 134 | 135 | list_eq :: (Ops r, Eq a) => [r (IO a)] -> [r (IO a)] -> r (IO Bool) 136 | list_eq [] [] = _pure (pure True) 137 | list_eq (v:vs) (v1:v1s) = 138 | (leq v v1) `_bind` (_lam $ \b -> 139 | _if b (list_eq vs v1s) (_pure (pure False))) 140 | list_eq _ _ = _pure (pure False) 141 | 142 | _when :: (Monoid m, Ops r) => r (IO Bool) -> r (IO m) -> r (IO m) 143 | _when cond act = cond `_bind` (_lam $ \b -> _if b act _empty) 144 | 145 | _whenOf :: (Monoid m, Ops r) => r (IO (Of Bool t)) -> r (t -> IO m) -> r (IO m) 146 | _whenOf cond act = cond `_bind` (_lam $ \b 147 | -> _case_of b (_lam $ \b' -> _lam $ \t 148 | -> _if b' (act <*> t) _empty)) 149 | 150 | _whenA :: (P.Applicative f, Ops r) => r Bool -> r (f ()) -> r (f ()) 151 | _whenA cond act = _if cond act (_pure (pure ())) 152 | 153 | runCode :: Code a -> Q (TExp a) 154 | runCode (Code a) = a 155 | 156 | 157 | 158 | type Fields r = [r (BS ())] 159 | type Schema = [ByteString] 160 | type Table = FilePath 161 | 162 | type Res = IO () 163 | 164 | data Record r = Record { fields :: Fields r, schema :: Schema } 165 | 166 | getField :: ByteString -> Record r -> r (BS ()) 167 | getField field (Record fs sch) = 168 | let i = fromJust (elemIndex field sch) 169 | in (fs !! i) 170 | 171 | getFields :: [ByteString] -> Record r -> [r (BS ())] 172 | getFields fs r = map (flip getField r) fs 173 | 174 | 175 | data Operator = Scan FilePath Schema | Project Schema Schema Operator 176 | | Filter Predicate Operator | Join Operator Operator deriving Show 177 | 178 | query, query2 :: Operator 179 | query = Project ["age"] ["age"] (Filter (Eq (Value "john") (Field "name")) (Scan "data/test.csv" ["name", "age"])) 180 | 181 | query2 = Project ["name"] ["name"] (Filter (Eq (Value "34") (Field "age")) (Scan "data/test.csv" ["name", "age"])) 182 | 183 | queryJoin :: Operator 184 | queryJoin = Join (Scan "data/test.csv" ["name", "age"]) (Scan "data/test1.csv" ["name", "weight"]) 185 | 186 | queryP :: Operator 187 | queryP = Project ["name"] ["name"] (Scan "data/test.csv" ["name", "age"]) 188 | 189 | data Predicate = Eq Ref Ref | Ne Ref Ref deriving Show 190 | 191 | data Ref = Field ByteString | Value ByteString deriving Show 192 | 193 | type QTExp a = Code a 194 | 195 | fix :: (a -> a) -> a 196 | fix f = let x = f x in x 197 | 198 | data Scanner = Scanner (BS ()) 199 | 200 | type BS a = Q.ByteString IO a 201 | 202 | newScanner :: FilePath -> IO Scanner 203 | newScanner fp = 204 | openBinaryFile fp ReadMode >>= \h -> return (Scanner (Q.fromHandle h)) 205 | 206 | nextLine :: Identity Scanner -> (Identity (BS ()), Identity Scanner) 207 | nextLine (Identity (Scanner bs)) = 208 | (Identity $ Q.takeWhile (/= '\n') bs, 209 | Identity $ Scanner (Q.drop 1 (Q.dropWhile (/= '\n') bs))) 210 | 211 | -- As |span| is not stage aware, it is more dynamic an necessary. Splitting 212 | -- the implementation up means that we can skip over entire rows if 213 | -- necessary in the generated code. 214 | _nextLineCode :: Code Scanner -> (Code (BS ()), Code Scanner) 215 | _nextLineCode scanner = 216 | let fs = Code [|| let (Scanner s) = $$(runCode scanner) in Q.takeWhile (/= '\n') s ||] 217 | ts = Code [|| let (Scanner s) = $$(runCode scanner) in Scanner (Q.drop 1 (Q.dropWhile (/= '\n') s)) ||] 218 | in (fs, ts) 219 | 220 | 221 | 222 | hasNext :: Scanner -> IO (Of Bool Scanner) 223 | hasNext (Scanner bs) = (\(b :> s) -> (not b :> Scanner s)) <$> Q.testNull bs 224 | 225 | 226 | while :: 227 | (Ops r, Monoid m) => 228 | r (t -> IO (Of Bool t)) -> r ((t -> IO m) -> t -> IO m) -> r (t -> IO m) 229 | while k b = _fix (\r -> _lam $ \rs -> _whenOf (k <*> rs) (_lam $ \rs -> b <*> r <*> rs)) 230 | 231 | whenM :: Monoid m => Bool -> m -> m 232 | whenM b act = if b then act else mempty 233 | 234 | processCSV :: forall m r . (Monoid m, Ops r) => Schema -> FilePath -> (Record r -> r (IO m)) -> r (IO m) 235 | processCSV ss f yld = 236 | _newScanner f `_bind` rows ss 237 | where 238 | rows :: Schema -> r (Scanner -> (IO m)) 239 | rows sch = do 240 | while (_lam _hasNext) 241 | (_lam $ \r -> _lam $ \rs -> 242 | (let (hs, ts) = _nextLine rs 243 | in yld (Record (parseRow sch hs) sch) >>> (r <*> ts)) 244 | ) 245 | 246 | -- We can't use the standard |BC.split| function here because 247 | -- we we statically know how far we will unroll. The result is then 248 | -- more static as we can do things like drop certain fields if we 249 | -- perform a projection. 250 | parseRow :: Schema -> r (BS ()) -> [r (BS ())] 251 | parseRow [] _ = [] 252 | parseRow [_] b = 253 | [take_while '\n' b] 254 | parseRow (_:ss') b = 255 | let new = tail_dropwhile ',' b 256 | rs = parseRow ss' new 257 | in (take_while ',' b : rs) 258 | 259 | printFields :: Ops r => Fields r -> r (IO ()) 260 | printFields [] = _empty 261 | printFields [x] = _putStr x >>> _putStr (liftBS (pure "\n")) 262 | printFields (x:xs) = 263 | _putStr x >>> _putStr (liftBS $ pure ",") >>> printFields xs 264 | 265 | evalPred :: Ops r => Predicate -> Record r -> r (IO Bool) 266 | evalPred predicate rec = 267 | case predicate of 268 | Eq a b -> leq (evalRef a rec) (evalRef b rec) 269 | Ne a b -> lneq (evalRef a rec) (evalRef b rec) 270 | 271 | evalRef :: Ops r => Ref -> Record r -> r (IO ByteString) 272 | evalRef (Value a) _ = _pure (pure a) 273 | evalRef (Field name) r = runBS $ getField name r 274 | 275 | 276 | restrict :: Ops r => Record r -> Schema -> Schema -> Record r 277 | restrict r newSchema parentSchema = 278 | Record (map (flip getField r) parentSchema) newSchema 279 | 280 | 281 | execOp :: (Monoid m, Ops r) => Operator -> (Record r -> r (IO m)) -> r (IO m) 282 | execOp op yld = 283 | case op of 284 | Scan file sch -> 285 | processCSV sch file yld 286 | Filter predicate parent -> execOp parent 287 | (\rec -> _when (evalPred predicate rec) (yld rec) ) 288 | Project newSchema parentSchema parent -> 289 | execOp parent (\rec -> yld (restrict rec newSchema parentSchema )) 290 | Join left right -> 291 | execOp left (\rec -> execOp right (\rec' -> 292 | let keys = schema rec `intersect` schema rec' 293 | in _when (list_eq (map runBS $ getFields keys rec) (map runBS $ getFields keys rec')) 294 | (yld (Record (fields rec ++ fields rec') 295 | (schema rec ++ schema rec'))))) 296 | 297 | runQuery :: Operator -> Q (TExp Res) 298 | runQuery q = [|| $$(runCode $ execOp q (printFields . fields)) ||] 299 | 300 | runQueryL :: Operator -> Q (TExp (IO [Record Identity])) 301 | runQueryL q = [|| $$(runCode $ execOp q (\r -> Code [|| return (return $$(runCode $ spill r)) ||])) ||] 302 | 303 | spill :: Record Code -> Code (Record Identity) 304 | spill (Record rs ss) = Code [|| Record $$(runCode $ spill2 rs) ss ||] 305 | 306 | spill2 :: [Code a] -> Code [Identity a] 307 | spill2 [] = Code [|| [] ||] 308 | spill2 (x:xs) = Code [|| (Identity $$(runCode x)) : $$(runCode $ spill2 xs) ||] 309 | 310 | runQueryUnstaged :: Operator -> Res 311 | runQueryUnstaged q = runIdentity (execOp q (printFields . fields)) 312 | 313 | runQueryUnstagedL :: Operator -> IO [Record Identity] 314 | runQueryUnstagedL q = runIdentity (execOp q (return . return . return)) 315 | 316 | test :: IO () 317 | test = do 318 | -- processCSV "data/test.csv" (print . getField "name") 319 | expr <- runQ $ unTypeQ $ runCode $ execOp query (printFields . fields) 320 | -- expr <- runQ $ unTypeQ $ power 321 | putStrLn $ pprint expr 322 | 323 | 324 | bsToExp :: B.ByteString -> Q Exp 325 | bsToExp bs = do 326 | helper <- [| stringToBs |] 327 | let chars = BC.unpack . BC.tail . (BC.dropWhile (/= '\n')) $ bs 328 | return $! AppE helper $! LitE $! StringL chars 329 | 330 | stringToBs :: String -> B.ByteString 331 | stringToBs = BC.pack 332 | 333 | -------------------------------------------------------------------------------- /src/Test.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TemplateHaskell #-} 2 | {-# LANGUAGE OverloadedStrings #-} 3 | {-# OPTIONS_GHC -ddump-splices #-} 4 | module Test where 5 | 6 | --import qualified Compiler as C 7 | import qualified LMS as L 8 | --import qualified Interpreter as I 9 | --import qualified SimpleInterpreter as SI 10 | import qualified StreamLMS as S 11 | --import qualified StreamInterpreter as StI 12 | --import qualified StreamInterpreter2 as StI2 13 | import Weigh 14 | import GHC.Stats 15 | import System.Mem 16 | import System.IO 17 | import Data.ByteString (ByteString) 18 | import Control.Monad.Trans.Resource 19 | 20 | import qualified Data.ByteString.Streaming.Char8 as Q 21 | 22 | import Criterion 23 | import Criterion.Main 24 | import System.IO.Silently 25 | 26 | 27 | --csvQuery = L.Filter (L.Eq (L.Value "cricket") (L.Field "word")) csvTable 28 | 29 | --csvTable = L.Scan ["word", "year", "n1" "n2"] "1gram.csv" 30 | 31 | 32 | for = flip map 33 | 34 | main :: IO () 35 | main = do 36 | defaultMain $ 37 | $$(L.genBench) 38 | 39 | {- 40 | putStrLn "Simple Interpreter" 41 | SI.runQuery SI.query 42 | SI.runQuery SI.queryJoin 43 | 44 | 45 | putStrLn "Interpreter" 46 | I.runQuery I.query 47 | I.runQuery I.queryJoin 48 | 49 | putStrLn "Compiler" 50 | $$(C.runQuery C.queryProj) 51 | $$(C.runQueryL C.queryProj) 52 | $$(C.runQuery C.query) 53 | $$(C.runQuery C.query2) 54 | $$(C.runQuery C.queryJoin) 55 | -} 56 | putStrLn "LMS Compiler" 57 | {- 58 | mainWith ( do 59 | action "q1" $$(S.runQuery S.query) 60 | action "q2" $$(S.runQuery S.query2) 61 | action "q3" $$(S.runQuery S.queryJoin) 62 | action "q4" $$(S.runQuery S.queryP) 63 | ) 64 | -} 65 | {- 66 | defaultMain $ [bgroup "StI" [ 67 | bench "q" $ whnfIO (silence $ StI.runQuery StI.query) 68 | , bench "q2" $ whnfIO (silence $ StI.runQuery StI.query2) 69 | , bench "qj" $ whnfIO (silence $ StI.runQuery StI.queryJoin) 70 | , bench "q" $ whnfIO (silence $ StI.runQuery StI.queryP) 71 | ]] 72 | ++ [ bgroup "StI2" [ 73 | bench "q" $ whnfIO (silence $ StI.runQuery StI.query) 74 | , bench "q2" $ whnfIO (silence $ StI.runQuery StI.query2) 75 | , bench "qj" $ whnfIO (silence $ StI.runQuery StI.queryJoin) 76 | , bench "q" $ whnfIO (silence $ StI.runQuery StI.queryP) 77 | ]] 78 | ++ -} 79 | {- 80 | defaultMain $ 81 | [bgroup "LMS-C" [ 82 | bench "q" $ whnfIO (silence $$(S.runQuery S.query)) 83 | , bench "q2" $ whnfIO (silence $$(S.runQuery S.query2)) 84 | , bench "qj" $ whnfIO (silence $$(S.runQuery S.queryJoin)) 85 | , bench "qp" $ whnfIO (silence $$(S.runQuery S.queryP)) ]] 86 | -} 87 | 88 | {- 89 | $$(L.runQuery L.query) 90 | $$(L.runQuery L.query2) 91 | $$(L.runQuery L.queryJoin) 92 | $$(L.runQuery L.queryP) 93 | performGC 94 | s2 <- getRTSStats 95 | 96 | performGC 97 | s1 <- getRTSStats 98 | -} 99 | 100 | 101 | -- Observe that the number of max_live_bytes is much lower for the 102 | -- streaming version. However, the number of allocated bytes is the same 103 | -- for each example as we must read every character eventually. 104 | --print (max_live_bytes s1) 105 | --print (max_live_bytes s2) 106 | 107 | 108 | 109 | 110 | -- 111 | putStrLn "LMS Compiler Stream" 112 | {- 113 | putStrLn "LMS Interpreter" 114 | L.runQueryUnstaged L.query 115 | L.runQueryUnstaged L.query2 116 | L.runQueryUnstaged L.queryJoin 117 | performGC 118 | s3 <- getRTSStats 119 | print (max_live_bytes s3) 120 | -} 121 | --------------------------------------------------------------------------------