├── ChangeLog.md
├── LICENSE
├── Main.hs
├── README.md
├── Setup.hs
├── cabal.project
├── data
    ├── gen-data.hs
    ├── test.csv
    └── test1.csv
├── sql-haskell.cabal
└── src
    ├── Compiler.hs
    ├── Interpreter.hs
    ├── LMS.hs
    ├── SimpleInterpreter.hs
    ├── SimpleStagedInterpreter.hs
    ├── StreamInterpreter.hs
    ├── StreamInterpreter2.hs
    ├── StreamLMS.hs
    └── Test.hs


/ChangeLog.md:
--------------------------------------------------------------------------------
1 | # Revision history for sql-haskell
2 | 
3 | ## 0.1.0.0 -- YYYY-mm-dd
4 | 
5 | * First version. Released on an unsuspecting world.
6 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2018, Matthew Pickering
 2 | 
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 |     * Redistributions of source code must retain the above copyright
 9 |       notice, this list of conditions and the following disclaimer.
10 | 
11 |     * Redistributions in binary form must reproduce the above
12 |       copyright notice, this list of conditions and the following
13 |       disclaimer in the documentation and/or other materials provided
14 |       with the distribution.
15 | 
16 |     * Neither the name of Matthew Pickering nor the names of other
17 |       contributors may be used to endorse or promote products derived
18 |       from this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/Main.hs:
--------------------------------------------------------------------------------
1 | module Main where
2 | 
3 | import qualified Test
4 | 
5 | main :: IO ()
6 | main = Test.main
7 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | This repo contains a Haskell implementation of
 2 | 
 3 | > Rompf, Tiark, and Nada Amin. "Functional pearl: a SQL to C compiler in 500 lines of code." Acm Sigplan Notices 50.9 (2015): 2-9.
 4 | 
 5 | The paper describes a simple SQL query compiler which uses generative programming
 6 | techniques. The idea is that we first define a definitional interpreter, which
 7 | we then stage in order to produce a compiler for a specific known SQL query.
 8 | Further to this, the authors use the LMS framework in scala which makes staging
 9 | a program as easy as adding some type annotations. The `LMS` module explicitly
10 | implements this idea without any magic that LMS performs to make the transformation
11 | appear transparent.
12 | 
13 | There are 3 modules of interest:
14 | 
15 | 1 `Interpreter.hs` - A basic naive interpreter
16 | 2. `Compiler.hs` - A staged interpreter
17 | 3. `LMS.hs` - A definition which can either be specialised to a compiler or an
18 |               interpreter by providing a type annotation.
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/Setup.hs:
--------------------------------------------------------------------------------
1 | import Distribution.Simple
2 | main = defaultMain
3 | 


--------------------------------------------------------------------------------
/cabal.project:
--------------------------------------------------------------------------------
1 | profiling: true
2 | packages: .
3 | 


--------------------------------------------------------------------------------
/data/gen-data.hs:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cabal
 2 | {- cabal:
 3 | build-depends: base, random-strings, streaming-bytestring, bytestring, streaming, resourcet
 4 | -}
 5 | module Main where
 6 | 
 7 | import Control.Monad
 8 | import Data.List
 9 | import Test.RandomStrings
10 | import qualified Data.ByteString.Streaming.Char8 as Q
11 | import qualified Streaming.Prelude as S
12 | import Streaming
13 | import qualified Streaming.Internal as I
14 | import Control.Monad.Trans.Resource
15 | 
16 | c_limit = 2 ^ c_limit_k
17 | 
18 | c_limit_k = 4
19 | 
20 | file_line :: Int -> Q.ByteString ResIO ()
21 | file_line l =
22 |   Q.mwrap (liftIO $ Q.string . intercalate "," <$> randomStringsLen (randomString (onlyAlpha randomASCII)) (1, 5) l)
23 | 
24 | {-
25 | main :: IO ()
26 | main =
27 | 
28 |   forM_ [0..c_limit_k] $ \c ->
29 |     let cols = 10 ^ c
30 |         headers = ["f" ++ show n | n <- [0..cols] ]
31 |         header = intercalate "," headers
32 |     in forM_ [0..5] $ \i -> do
33 |           let n = 10 ^ i
34 |               fname = "data-" ++ show n ++ "-" ++ show cols ++ ".csv"
35 |           file_lines <- replicateM n (file_line cols)
36 |           let f = header : file_lines
37 |           writeFile fname (unlines f)
38 |           -}
39 | 
40 | main :: IO ()
41 | main = runResourceT $
42 | 
43 |   forM_ [0..c_limit_k] $ \c ->
44 |     let cols = 2 ^ c
45 |         header :: Q.ByteString ResIO ()
46 |         header = Q.pack $
47 |                   intercalates (S.yield ',') $
48 |                     S.subst (\n -> S.yield 'f'  >> S.each (show n)) (S.take cols $ S.enumFrom 0)
49 |     in forM_ [0..5] $ \i -> do
50 |           let n = 10 ^ i
51 |               fname = "data-" ++ show n ++ "-" ++ show cols ++ ".csv"
52 |           let file_lines = I.replicates n (file_line cols)
53 |           let f = do I.yields header >> file_lines
54 |           liftIO $ print fname
55 |           Q.writeFile fname (Q.unlines f)
56 | 
57 | 


--------------------------------------------------------------------------------
/data/test1.csv:
--------------------------------------------------------------------------------
1 | name,weight
2 | john,100
3 | alex,80
4 | tim,60
5 | 


--------------------------------------------------------------------------------
/sql-haskell.cabal:
--------------------------------------------------------------------------------
 1 | -- Initial sql-haskell.cabal generated by cabal init.  For further
 2 | -- documentation, see http://haskell.org/cabal/users-guide/
 3 | 
 4 | name:                sql-haskell
 5 | version:             0.1.0.0
 6 | -- synopsis:
 7 | -- description:
 8 | license:             BSD3
 9 | license-file:        LICENSE
10 | author:              Matthew Pickering
11 | maintainer:          matthewtpickering@gmail.com
12 | -- copyright:
13 | -- category:
14 | build-type:          Simple
15 | extra-source-files:  ChangeLog.md
16 | cabal-version:       >=1.10
17 | 
18 | library
19 |   exposed-modules: Test Compiler Interpreter LMS SimpleInterpreter StreamLMS StreamInterpreter
20 |   -- SimpleStagedInterpreter
21 |   -- other-modules:
22 |   -- other-extensions:
23 |   build-depends:       base >=4.10, scanner, bytestring, template-haskell
24 |                        , th-lift-instances, file-embed, dump-core, streaming-bytestring,
25 |                        resourcet, weigh, streaming, criterion, silently
26 |   hs-source-dirs:      src
27 |   ghc-options: -Wall -O2 -ddump-str-signatures
28 |   default-language:    Haskell2010
29 | 
30 | executable test
31 |   main-is: Main.hs
32 |   other-modules:
33 |   build-depends: sql-haskell, base >= 4.10
34 |   ghc-options: -O0 -rtsopts -threaded
35 | 


--------------------------------------------------------------------------------
/src/Compiler.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | {-# LANGUAGE TemplateHaskell #-}
  3 | {-# LANGUAGE NoImplicitPrelude #-}
  4 | {-# LANGUAGE ScopedTypeVariables #-}
  5 | {-# LANGUAGE DeriveLift #-}
  6 | module Compiler where
  7 | 
  8 | import qualified Data.ByteString as B
  9 | import qualified Data.ByteString.Char8 as BC
 10 | import Data.ByteString (ByteString)
 11 | import Control.Monad
 12 | import Data.List
 13 | import Data.Maybe
 14 | import Language.Haskell.TH
 15 | import Language.Haskell.TH.Syntax
 16 | import Prelude hiding (Applicative(..), getLine, getChar)
 17 | import qualified Prelude as P
 18 | import Instances.TH.Lift ()
 19 | 
 20 | eq :: Eq a => Code a -> Code a -> Code Bool
 21 | eq (Code e1) (Code e2) = Code [|| $$e1 == $$e2 ||]
 22 | 
 23 | neq :: Eq a => Code a -> Code a -> Code Bool
 24 | neq (Code e1) (Code e2) = Code [|| $$e1 /= $$e2 ||]
 25 | 
 26 | newtype Code a = Code (Q (TExp a))
 27 | 
 28 | runCode :: Code a -> QTExp a
 29 | runCode (Code a) = a
 30 | 
 31 | pure :: Lift a => a -> Code a
 32 | pure = Code . unsafeTExpCoerce . lift
 33 | 
 34 | infixl 4 <*>
 35 | (<*>) :: Code (a -> b) -> Code a -> Code b
 36 | (Code f) <*> (Code a) = Code [|| $$f $$a ||]
 37 | 
 38 | type Fields = [QTExp ByteString]
 39 | type Schema = [ByteString]
 40 | type Res = IO ()
 41 | type Table = FilePath
 42 | 
 43 | data Record = Record { fields :: Fields, schema :: Schema }
 44 | 
 45 | data ResRecord = ResRecord { fields_r :: [ByteString], schema_r :: Schema }
 46 | 
 47 | getField :: ByteString -> Record -> QTExp ByteString
 48 | getField field (Record fs sch) =
 49 |   let i = fromJust (elemIndex field sch)
 50 |   in  (fs !! i)
 51 | 
 52 | getFields :: [ByteString] -> Record -> [QTExp ByteString]
 53 | getFields fs r = map (flip getField r) fs
 54 | 
 55 | data Operator = Scan FilePath Schema | Project Schema Schema Operator
 56 |               | Filter Predicate Operator | Join Operator Operator deriving Show
 57 | 
 58 | query :: Operator
 59 | query = Project ["age"] ["age"] (Filter (Eq (Value "john") (Field "name")) (Scan "data/test.csv" ["name", "age"]))
 60 | 
 61 | query2 :: Operator
 62 | query2 = Project ["name"] ["name"] (Filter (Eq (Value "34") (Field "age")) (Scan "data/test.csv" ["name", "age"]))
 63 | 
 64 | queryJoin :: Operator
 65 | queryJoin = Join (Scan "data/test.csv" ["name", "age"]) (Scan "data/test1.csv" ["name", "weight"])
 66 | 
 67 | queryProj :: Operator
 68 | queryProj = Project ["age"] ["age"] (Scan "data/test.csv" ["name", "age"])
 69 | 
 70 | data Predicate = Eq Ref Ref | Ne Ref Ref deriving Show
 71 | 
 72 | data Ref = Field ByteString | Value ByteString deriving Show
 73 | 
 74 | type QTExp a = Q (TExp a)
 75 | 
 76 | fix :: (a -> a) -> a
 77 | fix f = let x = f x in x
 78 | 
 79 | getLineS :: Scanner -> ByteString
 80 | getLineS = getLine . runScanner
 81 | 
 82 | getLine = getChar '\n'
 83 | {-# NOINLINE getLine #-}
 84 | 
 85 | getChar c s = BC.takeWhile (/= c) s
 86 | {-# NOINLINE getChar #-}
 87 | 
 88 | skipLine = skipChar '\n'
 89 | skipLineS = skipCharS '\n'
 90 | skipChar c s = BC.tail (BC.dropWhile (/= c) s)
 91 | skipCharS c s = Scanner (skipChar c (runScanner s))
 92 | {-# NOINLINE skipLine #-}
 93 | {-# NOINLINE skipChar #-}
 94 | {-# NOINLINE skipCharS #-}
 95 | {-# NOINLINE skipLineS #-}
 96 | 
 97 | data Scanner = Scanner ByteString
 98 | 
 99 | runScanner (Scanner b) = b
100 | 
101 | newScanner :: FilePath -> IO Scanner
102 | newScanner fp = Scanner <$>  B.readFile fp
103 | 
104 | nextLine :: Code Scanner -> (Code ByteString, Code Scanner)
105 | nextLine s =
106 |   let fs = Code [|| getLineS $$(runCode s) ||]
107 |       rs = Code [|| skipLineS $$(runCode s) ||]
108 |   in (fs, rs)
109 | 
110 | hasNext :: Scanner -> Bool
111 | hasNext (Scanner bs) = bs /= ""
112 | 
113 | while ::
114 |   Monoid m =>
115 |   QTExp (t -> Bool) -> QTExp ((t -> IO m) -> t -> IO m) -> QTExp (t -> IO m)
116 | while k b = [|| fix (\r rs -> whenM ($$k rs) ($$b r rs)) ||]
117 | 
118 | whenM :: Monoid m => Bool -> m -> m
119 | whenM b act = if b then act else mempty
120 | 
121 | processCSV :: forall m .Monoid m => Schema -> FilePath -> (Record -> QTExp (IO m)) -> QTExp (IO m)
122 | processCSV ss f yld =
123 |   [|| do
124 |         bs <- newScanner f
125 |         $$(rows ss) bs ||]
126 |   where
127 |     rows :: Schema -> QTExp (Scanner -> IO m)
128 |     rows sch = do
129 |       while [|| hasNext ||]
130 |             [|| \r rs -> do
131 |                   $$(let (hs, ts) = nextLine (Code [|| rs ||])
132 |                      in [|| $$(yld (Record (parseRow sch (runCode hs)) sch))
133 |                               >> r $$(runCode ts) ||] ) ||]
134 | 
135 |     -- We can't use the standard |BC.split| function here because
136 |     -- we we statically know how far we will unroll. The result is then
137 |     -- more static as we can do things like drop certain fields if we
138 |     -- perform a projection.
139 |     parseRow :: Schema -> QTExp ByteString -> [QTExp ByteString]
140 |     parseRow [] _ = []
141 |     parseRow [_] b =
142 |       [[|| skipLine $$b ||]]
143 |     parseRow (_:ss') b =
144 |       let new = [|| skipChar ',' $$b ||]
145 |           rs = parseRow ss' new
146 |       in ([|| skipChar ',' $$b ||] : rs)
147 | 
148 | 
149 | 
150 | printFields :: Fields -> QTExp Res
151 | printFields [] = [|| return () ||]
152 | printFields [x] = [|| BC.putStrLn $$x ||]
153 | printFields (x:xs) =
154 |   [||  B.putStr $$x >> BC.putStr "," >> $$(printFields xs) ||]
155 | 
156 | evalPred :: Predicate -> Record -> Code Bool
157 | evalPred predicate rec =
158 |   case predicate of
159 |     Eq a b -> eq (evalRef a rec) (evalRef b rec)
160 |     Ne a b -> neq (evalRef a rec) (evalRef b rec)
161 | 
162 | evalRef :: Ref -> Record -> Code ByteString
163 | evalRef (Value a) _ = Code [|| a ||]
164 | evalRef (Field name) r = Code (getField name r)
165 | 
166 | 
167 | restrict :: Record -> Schema -> Schema -> Record
168 | restrict r newSchema parentSchema =
169 |   Record (map (flip getField r) parentSchema) newSchema
170 | 
171 | -- We can unroll (==) if we statically know the length of each argument.
172 | _eq :: Eq a => [QTExp a] -> [QTExp a] -> QTExp Bool
173 | _eq [] [] = [|| True ||]
174 | _eq (v:vs) (v1:v1s) = [|| if $$v == $$v1 then $$(_eq vs v1s) else False ||]
175 | _eq _ _ = [|| False ||]
176 | 
177 | execOp :: Monoid m => Operator -> (Record -> QTExp (IO m)) -> QTExp (IO m)
178 | execOp op yld =
179 |   case op of
180 |     Scan file sch ->
181 |       processCSV sch file yld
182 |     Filter predicate parent -> execOp parent
183 |       (\rec -> [|| whenM $$(runCode $ evalPred predicate rec) $$(yld rec) ||] )
184 |     Project newSchema parentSchema parent ->
185 |       execOp parent (\rec -> yld (restrict rec newSchema parentSchema ))
186 |     Join left right ->
187 |       execOp left (\rec -> execOp right (\rec' ->
188 |         let keys = schema rec `intersect` schema rec'
189 |         in [|| whenM $$(_eq (getFields keys rec) (getFields keys rec'))
190 |                 ($$(yld (Record (fields rec ++ fields rec')
191 |                                 (schema rec ++ schema rec')))) ||] ))
192 | 
193 | runQuery :: Operator -> QTExp Res
194 | runQuery q = execOp q (printFields . fields)
195 | 
196 | inj :: ResRecord -> IO [ResRecord]
197 | inj = return . return
198 | 
199 | -- We still need to eliminate the binding time abstraction
200 | runQueryL :: Operator -> QTExp (IO [ResRecord])
201 | runQueryL o = execOp o (\r -> [|| inj  $$(spill r) ||])
202 | 
203 | spill :: Record -> QTExp ResRecord
204 | spill (Record rs ss) = [|| ResRecord $$(spill2 rs) ss ||]
205 | 
206 | spill2 :: [QTExp ByteString] -> QTExp [ByteString]
207 | spill2 [] = [|| [] ||]
208 | spill2 (x:xs) = [|| $$x : $$(spill2 xs) ||]
209 | 
210 | test :: IO ()
211 | test = do
212 | --  processCSV "data/test.csv" (print . getField "name")
213 |   expr <- runQ $ unTypeQ  $ execOp query (printFields . fields)
214 | --  expr <- runQ $ unTypeQ  $ power
215 |   putStrLn $ pprint expr
216 | 
217 | 
218 | 


--------------------------------------------------------------------------------
/src/Interpreter.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | module Interpreter where
  3 | 
  4 | import qualified Data.ByteString as B
  5 | import qualified Data.ByteString.Char8 as BC
  6 | import Data.ByteString (ByteString)
  7 | import Control.Monad
  8 | import Data.List
  9 | import Data.Maybe
 10 | 
 11 | type Fields = [ByteString]
 12 | type Schema = [ByteString]
 13 | type Table = FilePath
 14 | 
 15 | data Record = Record { fields :: Fields, schema :: Schema } deriving Show
 16 | 
 17 | data Operator = Scan Table | Project Schema Schema Operator
 18 |               | Filter Predicate Operator | Join Operator Operator
 19 | 
 20 | query :: Operator
 21 | query = Project ["name"] ["name"] (Scan "data/test.csv")
 22 | 
 23 | queryJoin :: Operator
 24 | queryJoin = Join (Scan "data/test.csv") (Scan "data/test1.csv")
 25 | 
 26 | 
 27 | data Predicate = Eq Ref Ref | Ne Ref Ref
 28 | 
 29 | data Ref = Field ByteString | Value ByteString
 30 | 
 31 | newtype Scanner = Scanner ByteString
 32 | 
 33 | newScanner :: FilePath -> IO Scanner
 34 | newScanner fp = Scanner <$> B.readFile fp
 35 | 
 36 | nextLine :: Scanner -> (ByteString, Scanner)
 37 | nextLine (Scanner bs) =
 38 |   let (fs, rs) = BC.span (/= '\n') bs
 39 |   in (fs, Scanner (BC.tail rs))
 40 | 
 41 | hasNext :: Scanner -> Bool
 42 | hasNext (Scanner bs) = bs /= ""
 43 | 
 44 | processCSV :: Monoid m => FilePath -> (Record -> IO m) -> IO m
 45 | processCSV fp yld = do
 46 |   s <- newScanner fp
 47 |   scanner s
 48 | 
 49 |   where
 50 |     scanner s = do
 51 |       let (fs, rs) = nextLine s
 52 |       rows (BC.split ',' fs) rs
 53 | 
 54 |     rows fs s =
 55 |       whenM (hasNext s)
 56 |         (let (r, rest) = nextLine s
 57 |           in yld (Record (BC.split ',' r) fs)
 58 |              <> rows fs rest)
 59 | 
 60 | whenM :: Monoid m => Bool -> m -> m
 61 | whenM b act = if b then act else mempty
 62 | 
 63 | 
 64 | printFields :: Fields -> IO ()
 65 | printFields [] = putStr "\n"
 66 | printFields [b] = BC.putStr b >> putStr "\n"
 67 | printFields (b:bs) = BC.putStr b >> BC.putStr "," >> printFields bs
 68 | 
 69 | evalPred :: Predicate -> Record -> Bool
 70 | evalPred  predicate rec =
 71 |   case predicate of
 72 |     Eq a b -> evalRef a rec == evalRef b rec
 73 |     Ne a b -> evalRef a rec /= evalRef b rec
 74 | 
 75 | evalRef :: Ref -> Record -> ByteString
 76 | evalRef (Value a) _ = a
 77 | evalRef (Field name) r = getField name r
 78 | 
 79 | getField :: ByteString -> Record -> ByteString
 80 | getField field (Record fs sch) =
 81 |   let i = fromJust (elemIndex field sch)
 82 |   in fs !! i
 83 | 
 84 | getFields :: [ByteString] -> Record -> [ByteString]
 85 | getFields fs r = map (flip getField r) fs
 86 | 
 87 | restrict :: Record -> [ByteString] -> [ByteString] -> Record
 88 | restrict r newSchema parentSchema =
 89 |   Record (getFields parentSchema r) newSchema
 90 | 
 91 | execOp :: Monoid m => Operator -> (Record -> IO m) -> IO m
 92 | execOp op yld =
 93 |   case op of
 94 |     Scan filename -> processCSV filename yld
 95 |     Filter predicate parent -> execOp parent (\rec -> whenM (evalPred predicate rec) (yld rec))
 96 |     Project newSchema parentSchema parent -> execOp parent (\rec -> yld (restrict rec newSchema parentSchema ))
 97 |     Join left right ->
 98 |       execOp left (\rec -> execOp right (\rec' ->
 99 |         let keys = schema rec `intersect` schema rec'
100 |         in whenM (getFields keys rec == getFields keys rec')
101 |             (yld (Record (fields rec ++ fields rec')
102 |                        (schema rec ++ schema rec')))))
103 | 
104 | 
105 | runQuery :: Operator -> IO ()
106 | runQuery o = execOp o (printFields . fields)
107 | 
108 | runQueryL :: Operator -> IO [Record]
109 | runQueryL o = execOp o (return . return)
110 | 
111 | main :: IO ()
112 | main = do
113 |   processCSV "data/test.csv" (print . getField "name")
114 |   runQuery query
115 | 
116 | 
117 | 
118 | 


--------------------------------------------------------------------------------
/src/LMS.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | {-# LANGUAGE TemplateHaskell #-}
  3 | {-# LANGUAGE NoImplicitPrelude #-}
  4 | {-# LANGUAGE ScopedTypeVariables #-}
  5 | {-# LANGUAGE KindSignatures #-}
  6 | {-# LANGUAGE DataKinds #-}
  7 | {-# LANGUAGE MultiParamTypeClasses #-}
  8 | {-# LANGUAGE TypeApplications #-}
  9 | {-# LANGUAGE AllowAmbiguousTypes #-}
 10 | {-# LANGUAGE InstanceSigs #-}
 11 | {-# LANGUAGE FlexibleContexts #-}
 12 | {-# LANGUAGE FlexibleInstances #-}
 13 | {-# LANGUAGE GADTs #-}
 14 | {-# LANGUAGE TypeFamilies #-}
 15 | {-# LANGUAGE ConstraintKinds #-}
 16 | {-# LANGUAGE DeriveLift #-}
 17 | module LMS where
 18 | 
 19 | import qualified Data.ByteString as B
 20 | import qualified Data.ByteString.Char8 as BC
 21 | import Data.ByteString (ByteString)
 22 | import Control.Monad
 23 | import Data.List
 24 | import Data.Maybe
 25 | import Language.Haskell.TH
 26 | import Language.Haskell.TH.Syntax
 27 | import Prelude hiding (Applicative(..))
 28 | import Instances.TH.Lift ()
 29 | --import Data.FileEmbed
 30 | import Data.Functor.Identity
 31 | import Control.Applicative (liftA2)
 32 | import System.IO.Unsafe
 33 | import qualified Prelude as P
 34 | import GHC.TypeLits
 35 | import Control.Applicative (liftA3)
 36 | 
 37 | import Instances.TH.Lift
 38 | 
 39 | import Criterion
 40 | import Criterion.Main
 41 | import System.IO.Silently
 42 | import System.Mem
 43 | import Debug.Trace
 44 | import System.IO
 45 | 
 46 | for = flip map
 47 | 
 48 | 
 49 | class Ops r where
 50 |   eq :: Eq a => r a -> r a -> r Bool
 51 |   neq :: Eq a => r a -> r a -> r Bool
 52 |   bc_split :: r Char -> r ByteString -> r [ByteString]
 53 |   tail_dropwhile :: Char -> r ByteString -> r ByteString
 54 |   take_while :: Char -> r ByteString -> r ByteString
 55 | 
 56 |   _if :: r Bool -> r a -> r a -> r a
 57 |   _caseString :: r ByteString -> r a -> r a -> r a
 58 |   _fix :: (r a -> r a) -> r a
 59 |   _lam :: (r a -> r b) -> r (a -> b)
 60 | 
 61 |   _bind :: Monad m => r (m a) -> (r (a -> m b)) -> r (m b)
 62 |   _print :: Show a => r a -> r Res
 63 |   _putStr :: r ByteString -> r Res
 64 |   _hPutStr :: r Handle -> r ByteString -> r Res
 65 |   (>>>) :: Monoid m => r m -> r m -> r m
 66 |   _empty :: Monoid m => r m
 67 |   _pure :: P.Applicative f => r a -> r (f a)
 68 | 
 69 |   -- Scanner interface
 70 |   _newScanner :: FilePath -> r (IO Scanner)
 71 |   _hasNext :: r Scanner -> r Bool
 72 |   _nextLine :: r Scanner -> (r ByteString, r Scanner)
 73 | 
 74 |   pure :: Lift a => a -> r a
 75 |   (<*>) :: r (a -> b) -> r a -> r b
 76 | 
 77 |   _case_record :: r (Record r1)
 78 |                -> r ((Fields r1) -> Schema -> c)
 79 |                -> r c
 80 | 
 81 |   _lup :: r ByteString -> r (Fields r1) -> r Schema -> r (r1 ByteString)
 82 | 
 83 |   _intersect :: Eq a => r [a] -> r [a] -> r [a]
 84 |   _mkRecord :: r (Fields r1) -> r Schema -> r (Record r1)
 85 |   _cons :: r a -> r [a] -> r [a]
 86 | 
 87 | 
 88 | 
 89 | infixl 4 <*>
 90 | 
 91 | newtype Code a = Code (Q (TExp a))
 92 | 
 93 | instance Ops Code where
 94 |   eq (Code e1) (Code e2) = Code [|| $$e1 == $$e2 ||]
 95 |   neq (Code e1) (Code e2) = Code [|| $$e1 /= $$e2 ||]
 96 |   bc_split (Code e1) (Code e2) = Code [|| BC.split $$e1 $$e2 ||]
 97 |   _if (Code a) (Code b) (Code c) = Code [|| if $$a then $$b else $$c ||]
 98 |   _caseString (Code a) (Code b) (Code c) =
 99 |     Code [|| case $$a of
100 |                 "" -> $$b
101 |                 _  -> $$c ||]
102 |   (>>>) (Code a) (Code b) = Code [|| $$a <> $$b ||]
103 |   _bind (Code a) (Code b) = Code [|| $$a >>= $$b ||]
104 |   _empty = Code [|| mempty ||]
105 |   _pure (Code p) = Code [|| P.pure $$p ||]
106 | 
107 |   _print (Code a) = Code [|| print $$a ||]
108 |   _putStr (Code a) = Code [|| BC.putStr $$a ||]
109 |   _hPutStr (Code a) (Code b) = Code [|| BC.hPutStr $$a $$b ||]
110 | 
111 |   tail_dropwhile c (Code b) = Code [|| BC.tail (BC.dropWhile (/= c) $$b) ||]
112 |   take_while c (Code b) = Code [|| BC.takeWhile (/= c) $$b ||]
113 |   _fix f = Code [|| fix (\a -> $$(runCode $ f (Code [||a||]))) ||]
114 | 
115 |   _lam f = Code $ [|| \a ->  $$(runCode $ f (Code [|| a ||]))  ||]
116 | 
117 | 
118 |   _newScanner fp = Code [|| newScanner fp ||]
119 |   _hasNext s = Code [|| hasNext $$(runCode s) ||]
120 |   _nextLine = _nextLineCode
121 | 
122 | 
123 | 
124 |   pure = Code . unsafeTExpCoerce . lift
125 |   (Code f) <*> (Code a) = Code [|| $$f $$a ||]
126 | 
127 |   _case_record (Code r) (Code k1) =
128 |     Code [|| case $$r of
129 |               Record rs ss -> $$(k1) rs ss ||]
130 | 
131 |   _lup (Code l1) (Code l2) (Code l3) = Code [|| lup $$l1 $$l2 $$l3 ||]
132 | 
133 |   _intersect (Code l1) (Code l2) = Code [|| $$l1 `intersect` $$l2 ||]
134 | 
135 |   _mkRecord (Code l1) (Code l2) = Code [|| Record $$l1 $$l2 ||]
136 | 
137 |   _cons (Code l1) (Code l2) = Code [|| $$l1 : $$l2 ||]
138 | 
139 | 
140 | instance Ops Identity where
141 |   eq = liftA2 (==)
142 |   neq = liftA2 (/=)
143 |   bc_split = liftA2 (BC.split)
144 |   tail_dropwhile c = fmap (BC.tail . (BC.dropWhile (/= c)))
145 |   take_while c = fmap (BC.takeWhile (/= c))
146 |   _print = fmap print
147 |   _putStr = fmap (BC.putStr)
148 |   _hPutStr = liftA2 BC.hPut
149 |   _empty = Identity mempty
150 |   _if (Identity b) (Identity c1) (Identity c2) = Identity (if b then c1 else c2)
151 | 
152 |   _fix = fix
153 |   _lam f = Identity (\a -> runIdentity (f (Identity a)))
154 |   (>>>) = liftA2 mappend
155 |   _bind = liftA2 (>>=)
156 |   _pure = fmap (P.pure)
157 |   _newScanner fp = Identity (newScanner fp)
158 |   _hasNext = fmap hasNext
159 |   _nextLine = nextLine
160 | 
161 |   pure = Identity
162 |   (<*>) (Identity a1) (Identity a2) = Identity (a1 a2)
163 | 
164 |   _case_record (Identity r) (Identity k) =
165 |     case r of
166 |       Record rs ss -> Identity (k rs ss)
167 | 
168 |   _lup = liftA3 lup
169 | 
170 |   _intersect = liftA2 intersect
171 |   _mkRecord = liftA2 Record
172 | 
173 |   _cons = liftA2 (:)
174 | 
175 | 
176 | 
177 | _when :: (Monoid m, Ops r) => r Bool -> r m -> r m
178 | _when cond act = _if cond act _empty
179 | 
180 | _whenA :: (P.Applicative f, Ops r) => r Bool -> r (f ()) -> r (f ())
181 | _whenA cond act = _if cond act (_pure (pure ()))
182 | 
183 | runCode :: Code a -> Q (TExp a)
184 | runCode (Code a) = a
185 | 
186 | 
187 | 
188 | type Fields r = [r ByteString]
189 | type Schema = [ByteString]
190 | type Table = FilePath
191 | 
192 | type Res = IO ()
193 | 
194 | data Record r = Record { fields :: Fields r, schema :: Schema }
195 | 
196 | getField :: Ops r1 => r1 ByteString -> r1 (Record r) -> r1 (r ByteString)
197 | getField field r =
198 |   _case_record r (_lam $ \fs -> _lam $ \sch -> _lup field fs sch)
199 | 
200 | 
201 | lup :: ByteString -> Fields r -> Schema -> r ByteString
202 | lup field fs sch =
203 |   let i = fromJust (elemIndex field sch)
204 |   in  (fs !! i)
205 | 
206 | --getFields :: [ByteString] -> Record r -> [r ByteString]
207 | --getFields fs r = map (flip getField r) fs
208 | 
209 | 
210 | data Operator = Scan FilePath Schema | Project Schema Schema Operator
211 |               | Filter Predicate Operator | Join Operator Operator deriving (Lift, Show)
212 | 
213 | cols n = take (2 ^ n) [BC.pack('f':show n) | n <- [0..]]
214 | 
215 | fp c n = "data/data-" ++ show (10^n) ++ "-" ++ show (2 ^ c) ++ ".csv"
216 | 
217 | 
218 | queryP, queryJoin, queryLast, query, query2 :: Int -> Int -> Operator
219 | query c n  =
220 |   Project ["f0"] ["f0"] (Filter (Eq (Value "a") (Field "f0"))
221 |   (Scan (fp c n) (cols c)))
222 | 
223 | 
224 | query2 c n = Project ["f0"] ["f0"] (Filter (Eq (Value "a") (Field "f0"))
225 |               (Scan (fp c n) (cols c)))
226 | 
227 | queryJoin c n = Join (Scan (fp c n) (cols c)) (Scan (fp c n) (cols c))
228 | 
229 | queryP c n = Project ["f0"] ["f0"] (Scan (fp c n) (cols c))
230 | 
231 | queryLast c n =
232 |   let cs = cols c
233 |   in Project [last cs] [last cs] (Scan (fp c n) cs)
234 | 
235 | 
236 | data Predicate = Eq Ref Ref | Ne Ref Ref deriving (Show, Lift)
237 | 
238 | data Ref = Field ByteString | Value ByteString deriving (Show, Lift)
239 | 
240 | type QTExp a = Code a
241 | 
242 | fix :: (a -> a) -> a
243 | fix f = let x = f x in x
244 | 
245 | data Scanner = Scanner ByteString
246 | 
247 | newScanner :: FilePath -> IO Scanner
248 | newScanner fp = Scanner <$> B.readFile fp
249 | 
250 | nextLine :: Identity Scanner -> (Identity ByteString, Identity Scanner)
251 | nextLine (Identity (Scanner bs)) =
252 |   let (fs, rs) = BC.span (/= '\n') bs
253 |   in (Identity fs, Identity (Scanner (BC.tail rs)))
254 | 
255 | -- As |span| is not stage aware, it is more dynamic an necessary. Splitting
256 | -- the implementation up means that we can skip over entire rows if
257 | -- necessary in the generated code.
258 | _nextLineCode :: Code Scanner -> (Code ByteString, Code Scanner)
259 | _nextLineCode scanner =
260 |   let fs = Code [|| let (Scanner s) = $$(runCode scanner) in BC.takeWhile (/= '\n') s ||]
261 |       ts = Code [|| let (Scanner s) = $$(runCode scanner) in Scanner (BC.tail (BC.dropWhile (/= '\n') s)) ||]
262 |   in (fs, ts)
263 | 
264 | 
265 | 
266 | hasNext :: Scanner -> Bool
267 | hasNext (Scanner bs) = bs /= ""
268 | 
269 | while ::
270 |   (Ops r, Monoid m) =>
271 |   r (t -> Bool) -> r ((t -> IO m) -> t -> IO m) -> r (t -> IO m)
272 | while k b = _fix (\r -> _lam $ \rs -> _when (k <*> rs) (b <*> r <*> rs))
273 | 
274 | whenM :: Monoid m => Bool -> m -> m
275 | whenM b act = if b then act else mempty
276 | 
277 | type family ResT r1 r2 :: * -> * where
278 |   ResT Identity Code = Code
279 |   ResT Code Identity = Code
280 |   ResT Identity r = r
281 | 
282 | processCSV :: forall m r r1 . (Monoid m, O r1 r)
283 |            => Schema -> FilePath -> (r1 (Record r) -> (ResT r1 r) (IO m)) -> (ResT r1 r) (IO m)
284 | processCSV ss f yld =
285 |         _newScanner f `_bind` rows ss
286 |   where
287 |     rows :: Schema -> (ResT r1 r) (Scanner -> (IO m))
288 |     rows sch = do
289 |       while (_lam _hasNext)
290 |             (_lam $ \r -> _lam $ \rs ->
291 |               (let (hs, ts) = _nextLine rs
292 |                    rec = _mkRecord (parseRow sch hs) (pure sch)
293 |               in yld rec  >>> (r <*> ts))
294 |               )
295 | 
296 |     -- We can't use the standard |BC.split| function here because
297 |     -- we we statically know how far we will unroll. The result is then
298 |     -- more static as we can do things like drop certain fields if we
299 |     -- perform a projection.
300 |     parseRow :: Schema -> (ResT r1 r) ByteString -> r1 [r ByteString]
301 |     parseRow [] _ = _empty
302 |     parseRow [_] b =
303 |       weaken (take_while '\n' b) `_cons` _empty
304 |     parseRow (_:ss') b =
305 |       let new = tail_dropwhile ',' b
306 |           rs = parseRow ss' new
307 |       in weaken (take_while ',' b)  `_cons` rs
308 | 
309 | class Weaken r1 r where
310 |   weaken :: (ResT r1 r) a -> r1 (r a)
311 | 
312 | instance Weaken Code Identity where
313 |   weaken (Code c) = Code [|| Identity $$c ||]
314 | 
315 | instance Weaken Identity i where
316 |   weaken i = Identity i
317 | 
318 | 
319 | 
320 | printFields :: Ops r => r Handle -> Fields r -> r Res
321 | printFields _ [] = _empty
322 | printFields h [x] = _hPutStr h x >>> _hPutStr h (pure "\n")
323 | printFields h (x:xs) =
324 |   _hPutStr h x >>> _hPutStr h (pure ",") >>> printFields h xs
325 | 
326 | class Collapse r1 r2 where
327 |   c :: r1 (r2 a) -> (ResT r1 r2) a
328 | 
329 | instance Collapse Identity r where
330 |   c (Identity a) = a
331 | 
332 | instance Collapse Code Identity where
333 |   c (Code c) = Code [|| runIdentity $$c ||]
334 | 
335 | type O r1 r = (Collapse r1 r
336 |                           , Ops r
337 |                           , Ops (ResT r1 r)
338 |                           , Ops r1
339 |                           , Weaken r1 r)
340 | 
341 | evalPred :: forall r1 r . (O r1 r)
342 |          => Predicate -> r1 (Record r) -> (ResT r1 r) Bool
343 | evalPred  predicate rec =
344 |   case predicate of
345 |     Eq a b -> eq (evalRef a rec) (evalRef b rec)
346 |     Ne a b -> neq (evalRef a rec) (evalRef b rec)
347 | 
348 | evalRef :: (Collapse r1 r
349 |            , Ops r
350 |            , Ops (ResT r1 r)
351 |            , Ops r1 ) => Ref -> r1 (Record r) -> (ResT r1 r) ByteString
352 | evalRef (Value a) _ = pure a
353 | evalRef (Field name) r = c $ getField (pure name) r
354 | 
355 | 
356 | class O r1 r => ListEq (s :: Symbol) r r1 where
357 |   list_eq :: (Eq a) => r1 [r a] -> r1 [r a] -> (ResT r1 r) Bool
358 | 
359 | class O r1 r => Restrict (s :: Symbol) r r1 where
360 |   restrict :: Ops r => r1 (Record r) -> Schema -> Schema -> r1 (Record r)
361 | 
362 | 
363 | -- Point of this instance is that we can use the normal `map` in order to
364 | -- get the value of each field at
365 | instance Restrict "unrolled" Code Identity where
366 |   restrict r newSchema parentSchema
367 |     | length parentSchema <= 10 =
368 |     let ns = map Identity parentSchema
369 |         nfs = sequence $ map (flip getField r) ns
370 |     in Record <$> nfs <*> Identity newSchema
371 |   restrict r newSchema parentSchema =
372 |     restrict @"recursive" r newSchema parentSchema
373 | --    Record <$> (map (flip getField r) parentSchema) <*> _
374 | 
375 | instance Restrict s Identity Identity where
376 |   restrict rec newSchema parentSchema =
377 |     let ns = map Identity parentSchema
378 |         r = map (runIdentity . flip getField rec) ns
379 |     in Identity $ Record r parentSchema
380 | 
381 | -- In this instance we lost the static information about
382 | instance Restrict "recursive" Identity Code where
383 |   restrict :: Code (Record Identity) -> Schema -> Schema -> Code (Record Identity)
384 |   restrict rec news ps =
385 |     let ns = map pure ps
386 |         r = map (flip getField rec) ns
387 |     in Code [|| Record $$(runCode $ spillL r) ps ||]
388 | 
389 | -- In this instance we just do the same as the unrolled version because
390 | -- that's already recursive so err, that's good I guess.
391 | --
392 | -- Can't really think how to make this totally suck now because the problem
393 | -- is that we have to maintain the staticness of the schema so we can't
394 | -- really weaken.
395 | instance Restrict "recursive" Code Identity where
396 |   restrict :: Identity (Record Code) -> Schema -> Schema -> Identity (Record Code)
397 |   restrict (Identity rec) new ps =
398 |     let fs = fields rec
399 |         fs' = map (getField2 fs ps) (map pure ps)
400 |         getField' = getField2
401 |     in Identity (Record fs' new)
402 | 
403 | getField2 :: Fields Code -> Schema -> Code ByteString ->  Code ByteString
404 | getField2 fields ps lup_field =
405 |   Code [|| runIdentity $ lup $$(runCode lup_field) $$(runCode $ spill2 fields) ps ||]
406 | 
407 | 
408 | -- One of my favourite functions <3
409 | spillL :: [Code a] -> Code [a]
410 | spillL [] = Code [|| [] ||]
411 | spillL (x:xs) = Code [|| $$(runCode x) : $$(runCode $ spillL xs) ||]
412 | 
413 | spillQ :: [Q (TExp a)] -> Q (TExp [a])
414 | spillQ [] = [|| [] ||]
415 | spillQ (x:xs) = [|| $$x : $$(spillQ xs) ||]
416 | 
417 | instance ListEq "unrolled" Code Identity where
418 |   list_eq :: (Eq a) => Identity [Code a] -> Identity [Code a] -> Code Bool
419 |   list_eq (Identity xs) (Identity ys) =
420 |     let lxs = length xs
421 |         lys = length ys
422 |     in if max lxs lys <= 10
423 |          then list_eq_r xs ys
424 |          else list_eq @"recursive" (Identity xs) (Identity ys)
425 | 
426 | 
427 | list_eq_r [] [] = pure True
428 | list_eq_r (v:vs) (v1:v1s)
429 |   = _if (eq v v1) (list_eq_r vs v1s) (pure False)
430 | list_eq_r _ _ = pure False
431 | 
432 | instance ListEq s Identity Identity where
433 |   list_eq :: (Eq a) => Identity [Identity a] -> Identity [Identity a] -> Identity Bool
434 |   list_eq xs ys = Identity (xs == ys)
435 | 
436 | instance ListEq "recursive" Identity Code where
437 |   list_eq :: Eq a => Code [Identity a] -> Code [Identity a] -> Code Bool
438 |   list_eq (Code xs) (Code ys) = Code [|| $$(xs) == $$(ys) ||]
439 | 
440 | 
441 | pull :: [Code a] -> Code [a]
442 | pull [] = Code [|| [] ||]
443 | pull (x:xs) = Code [|| $$(runCode x) : $$(runCode $ pull xs) ||]
444 | 
445 | instance ListEq "recursive" Code Identity where
446 |   list_eq :: (Eq a) => Identity [Code a] -> Identity [Code a] -> Code Bool
447 |   list_eq (Identity xs) (Identity ys) = Code [|| $$(runCode $ pull xs) == $$(runCode $ pull ys) ||]
448 | 
449 | 
450 | -- if r = Code then r1 = Identity
451 | --
452 | -- if r = Identity then r1 = Code or Identity
453 | -- Having two type parameters means that we can either choose to use a
454 | -- continuation which statically knows the argument (r1 = Identity) or not.
455 | execOp :: forall restrict le r r1 m . (
456 |                                         Restrict restrict r r1
457 |                                       , ListEq le r r1
458 |                                       , Monoid m
459 |                                       , Ops r
460 |                                       , Ops r1)
461 |                                       => Operator
462 |                                       -> (r1 (Record r) -> (ResT r1 r) (IO m))
463 |                                       -> (ResT r1 r) (IO m)
464 | execOp op yld =
465 |   case op of
466 |     Scan file sch ->
467 |       processCSV sch file yld
468 |     Filter predicate parent -> execOp @restrict @le parent
469 |       (\rec -> _if (evalPred @r1 @r predicate rec) (yld rec) _empty )
470 |     Project newSchema parentSchema parent ->
471 |       execOp @restrict @le parent (\rec -> yld (restrict @restrict rec newSchema parentSchema ))
472 |     Join left right ->
473 |       execOp @restrict @le left (\rec -> execOp @restrict @le right (\rec' ->
474 |         let k1 = _schema rec
475 |             k2 = _fields rec
476 |             keys = _intersect (_schema rec) (_schema rec')
477 |             leq = list_eq @le @r @r1 @ByteString k2 (_fields rec')
478 |         in _when leq
479 |                (yld (_mkRecord (_fields rec >>> _fields rec')
480 |                                (_schema rec >>> _schema rec')
481 |                        ))))
482 | 
483 | _fields r = _case_record r (_lam $ \fs -> _lam $ \_ -> fs)
484 | _schema r = _case_record r (_lam $ \_  -> _lam $ \ss -> ss)
485 | 
486 | execOpU :: forall r r1 m . (Monoid m, O r r1
487 |                            , Restrict "unrolled" r r1
488 |                            , ListEq "unrolled" r r1)
489 |         => Operator
490 |         -> (r1 (Record r) -> (ResT r1 r) (IO m))
491 |         -> (ResT r1 r) (IO m)
492 | execOpU = execOp @"unrolled" @"unrolled"
493 | 
494 | print_k h =
495 |     (\r -> Code [|| $$(runCode $ printFields h (fields (runIdentity r))) ||])
496 | 
497 | 
498 | wrap :: Code (Handle -> IO ()) -> Code (IO ())
499 | wrap (Code c) =
500 |   Code [|| withFile "/dev/null" WriteMode $$c ||]
501 | 
502 | -- r1 = Identity; r = Code
503 | runQuery :: Operator -> Q (TExp ( Res))
504 | runQuery q =
505 |   runCode $
506 |     wrap (Code [|| \h -> $$(runCode $ execOpU q (print_k (Code [|| h ||]))) ||])
507 | 
508 | -- r = Identity; r1 = Identity
509 | runQueryUnstaged :: Operator -> ( Res)
510 | runQueryUnstaged q =
511 |   --traceShow q
512 |   withFile "/dev/null" WriteMode (\handle ->
513 |   runIdentity (execOpU q
514 |     (\r -> Identity (
515 |               runIdentity (printFields (Identity handle) (fields (runIdentity r)))))))
516 | 
517 | runQueryUnstagedC :: Operator -> Q (TExp ( Res))
518 | runQueryUnstagedC q = [|| runQueryUnstaged q ||]
519 | 
520 | -- Unrolled le, rec r
521 | runQueryUR :: Operator -> Q (TExp ( Res))
522 | runQueryUR q =
523 |   runCode $
524 |     wrap (Code [|| \h -> $$(runCode $ execOp @"unrolled" @"recursive" q (print_k (Code [|| h ||]))) ||])
525 | 
526 | -- Rec le, unrolled r
527 | runQueryRU :: Operator -> Q (TExp ( Res))
528 | runQueryRU q =
529 |   runCode $
530 |     wrap (_lam $ \h -> execOp @"recursive" @"unrolled" q (print_k h))
531 | 
532 | -- Rec le, rec r
533 | runQueryRR :: Operator -> Q (TExp ( Res))
534 | runQueryRR q =
535 |   runCode $
536 |     wrap (_lam $ \h -> execOp @"recursive" @"recursive" q (print_k h))
537 | 
538 | -- r1 = Code; r = Identity
539 | -- If r1 = Code then we can't use the unrolled versions as we don't know
540 | -- the schema. We can only unroll the loop.
541 | runQueryDynamic :: Operator -> Q (TExp ( Res))
542 | runQueryDynamic q =
543 |   runCode (
544 |     wrap (_lam $ \h -> execOp @"recursive" @"recursive" @Identity @Code q (printFields17 h)))
545 | 
546 | queries = [ --("query", query)
547 |           --, ("query2", query2)
548 |           --("queryJoin", queryJoin)
549 |           ("queryP", queryP)
550 |           , ("queryLast", queryLast)]
551 | 
552 | runners = [ ("best", runQuery)
553 |           , ("ur", runQueryUR)
554 |           , ("ru", runQueryRU)
555 |           , ("rr", runQueryRR)
556 |           , ("dynamic", runQueryDynamic)
557 |           , ("unstaged", runQueryUnstagedC)
558 |           ]
559 | 
560 | files = [(n, c)
561 |         | n <- [3..3], c <- [1..3] ]
562 | 
563 | genBench :: Q (TExp [Benchmark])
564 | genBench =
565 |   spillQ $ for runners $ \(name, r) ->
566 |     [|| bgroup name $$(spillQ $ for queries $ \(qname, q) ->
567 |         [|| bgroup qname $ $$(spillQ $ for files $ \(n, c) ->
568 |           [|| let b = $$(r (q c n)) in bench $$(runCode $ pure $ fp c n) $ perBatchEnv (const performGC) (\() -> b) ||]) ||]) ||]
569 | 
570 | printFields17 :: (Code Handle) -> Code (Record Identity) -> Code (IO ())
571 | printFields17 (Code h) (Code c) =
572 |       Code [|| runIdentity $ printFields (Identity $$h) (fields $$(c)) ||]
573 | 
574 | spill :: Record Code -> Code (Record Identity)
575 | spill (Record rs ss) = Code [|| Record $$(runCode $ spill2 rs) ss ||]
576 | 
577 | spill2 :: [Code ByteString] -> Code [Identity ByteString]
578 | spill2 [] = Code [|| [] ||]
579 | spill2 (x:xs) = Code [|| (Identity $$(runCode x)) : $$(runCode $ spill2 xs) ||]
580 | 
581 | 
582 | runQueryUnstagedL :: Operator -> IO [Record Identity]
583 | runQueryUnstagedL q = runIdentity (execOpU @Identity @Identity q (return . return . return . runIdentity))
584 | 
585 | test :: IO ()
586 | test = do
587 | --  processCSV "data/test.csv" (print . getField "name")
588 |   --expr <- runQ $ unTypeQ  $ runCode $ execOpU query (printFields . fields)
589 | --  expr <- runQ $ unTypeQ  $ power
590 |   --putStrLn $ pprint expr
591 |   print ""
592 | 
593 | 
594 | bsToExp :: B.ByteString -> Q Exp
595 | bsToExp bs = do
596 |     helper <- [| stringToBs |]
597 |     let chars = BC.unpack . BC.tail . (BC.dropWhile (/= '\n')) $ bs
598 |     return $! AppE helper $! LitE $! StringL chars
599 | 
600 | stringToBs :: String -> B.ByteString
601 | stringToBs = BC.pack
602 | 
603 | 


--------------------------------------------------------------------------------
/src/SimpleInterpreter.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | module SimpleInterpreter where
  3 | 
  4 | import qualified Data.ByteString as B
  5 | import qualified Data.ByteString.Char8 as BC
  6 | import Data.ByteString (ByteString)
  7 | import Control.Monad
  8 | import Data.List
  9 | import Data.Maybe
 10 | 
 11 | type Fields = [ByteString]
 12 | type Schema = [ByteString]
 13 | type Table = FilePath
 14 | 
 15 | data Record = Record { fields :: Fields, schema :: Schema } deriving Show
 16 | 
 17 | 
 18 | 
 19 | data Operator = Scan Table | Project Schema Schema Operator
 20 |               | Filter Predicate Operator | Join Operator Operator
 21 | 
 22 | query :: Operator
 23 | query = Project ["name"] ["name"] (Scan "data/test.csv")
 24 | 
 25 | queryJoin :: Operator
 26 | queryJoin = Join (Scan "data/test.csv") (Scan "data/test1.csv")
 27 | 
 28 | 
 29 | data Predicate = Eq Ref Ref | Ne Ref Ref
 30 | 
 31 | data Ref = Field ByteString | Value ByteString
 32 | 
 33 | processCSV :: FilePath -> IO [Record]
 34 | processCSV fp = do
 35 |   s <- B.readFile fp
 36 |   scanner s
 37 | 
 38 |   where
 39 |     scanner s = do
 40 |       let (fs, rs) = BC.span (/= '\n') s
 41 |       return $ rows (BC.split ',' fs) (BC.tail rs)
 42 | 
 43 |     rows :: Schema -> ByteString -> [Record]
 44 |     rows fs rs =
 45 |       case rs of
 46 |         "" -> []
 47 |         _ ->
 48 |           let (r, rest) = BC.span (/= '\n') rs
 49 |               r' = (Record (BC.split ',' r) fs)
 50 |           in (r' : rows fs (BC.tail rest))
 51 | 
 52 | 
 53 | printFields :: Fields -> IO ()
 54 | printFields [] = putStr "\n"
 55 | printFields [b] = BC.putStr b >> putStr "\n"
 56 | printFields (b:bs) = BC.putStr b >> BC.putStr "," >> printFields bs
 57 | 
 58 | evalPred :: Predicate -> Record -> Bool
 59 | evalPred  predicate rec =
 60 |   case predicate of
 61 |     Eq a b -> evalRef a rec == evalRef b rec
 62 |     Ne a b -> evalRef a rec /= evalRef b rec
 63 | 
 64 | evalRef :: Ref -> Record -> ByteString
 65 | evalRef (Value a) _ = a
 66 | evalRef (Field name) r = getField name r
 67 | 
 68 | getField :: ByteString -> Record -> ByteString
 69 | getField field (Record fs sch) =
 70 |   let i = fromJust (elemIndex field sch)
 71 |   in fs !! i
 72 | 
 73 | getFields :: [ByteString] -> Record -> [ByteString]
 74 | getFields fs r = map (flip getField r) fs
 75 | 
 76 | restrict :: [ByteString] -> [ByteString] -> Record -> Record
 77 | restrict newSchema parentSchema r =
 78 |   Record (getFields parentSchema r) newSchema
 79 | 
 80 | execOp :: Operator -> IO [Record]
 81 | execOp op  =
 82 |   case op of
 83 |     Scan filename -> processCSV filename
 84 |     --Print p       -> execOp p (printFields . fields)
 85 |     Filter predicate parent -> do
 86 |       rs <- execOp parent
 87 |       return (filter (evalPred predicate) rs)
 88 |     Project newSchema parentSchema parent -> do
 89 |       rs <- execOp parent
 90 |       return $ map (restrict newSchema parentSchema) rs
 91 |     Join left right -> do
 92 |       ls <- execOp left
 93 |       rs <- execOp right
 94 |       return $ do
 95 |         rec <- ls
 96 |         rec' <- rs
 97 |         let keys = schema rec `intersect` schema rec'
 98 |         guard (getFields keys rec == getFields keys rec')
 99 |         (return (Record (fields rec ++ fields rec')
100 |                        (schema rec ++ schema rec')))
101 | 
102 | 
103 | runQuery :: Operator -> IO ()
104 | runQuery o = execOp o >>= print
105 | 
106 | main :: IO ()
107 | main = do
108 | --  processCSV "data/test.csv" (print . getField "name")
109 |   execOp query >>= print
110 | 
111 | 
112 | 
113 | 


--------------------------------------------------------------------------------
/src/SimpleStagedInterpreter.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | {-# LANGUAGE TemplateHaskell #-}
  3 | {-# LANGUAGE NoImplicitPrelude #-}
  4 | {-# LANGUAGE OverloadedStrings #-}
  5 | module SimpleStagedInterpreter where
  6 | 
  7 | import qualified Data.ByteString as B
  8 | import qualified Data.ByteString.Char8 as BC
  9 | import Data.ByteString (ByteString)
 10 | import Control.Monad
 11 | import Data.List
 12 | import Data.Maybe
 13 | import Language.Haskell.TH
 14 | import Language.Haskell.TH.Syntax
 15 | import Prelude hiding (Applicative(..))
 16 | import Instances.TH.Lift ()
 17 | 
 18 | class Ops r where
 19 |   eq :: Eq a => r a -> r a -> r Bool
 20 |   neq :: Eq a => r a -> r a -> r Bool
 21 |   bc_split :: r Char -> r ByteString -> r [ByteString]
 22 | 
 23 | instance Ops Code where
 24 |   eq (Code e1) (Code e2) = Code [|| $$e1 == $$e2 ||]
 25 |   neq (Code e1) (Code e2) = Code [|| $$e1 /= $$e2 ||]
 26 |   bc_split (Code e1) (Code e2) = Code [|| BC.split $$e1 $$e2 ||]
 27 | 
 28 | newtype Code a = Code (Q (TExp a))
 29 | 
 30 | runCode :: Code a -> QTExp a
 31 | runCode (Code a) = a
 32 | 
 33 | 
 34 | pure :: Lift a => a -> Code a
 35 | pure = Code . unsafeTExpCoerce . lift
 36 | 
 37 | infixl 4 <*>
 38 | (<*>) :: Code (a -> b) -> Code a -> Code b
 39 | (Code f) <*> (Code a) = Code [|| $$f $$a ||]
 40 | 
 41 | type Fields = [QTExp ByteString]
 42 | type Schema = [ByteString]
 43 | type Res = IO ()
 44 | type Table = FilePath
 45 | 
 46 | data Record = Record { fields :: Fields, schema :: Schema }
 47 | 
 48 | getField :: ByteString -> Record -> QTExp ByteString
 49 | getField field (Record fs sch) =
 50 |   let i = fromJust (elemIndex field sch)
 51 |   in  (fs !! i)
 52 | 
 53 | getFields :: [ByteString] -> Record -> [QTExp ByteString]
 54 | getFields fs r = map (flip getField r) fs
 55 | 
 56 | 
 57 | data Operator = Scan FilePath Schema | Print Operator | Project Schema Schema Operator
 58 |               | Filter Predicate Operator | Join Operator Operator deriving Show
 59 | 
 60 | query :: Operator
 61 | query = Project ["age"] ["age"] (Filter (Eq (Value "john") (Field "name")) (Scan "data/test.csv" ["name", "age"]))
 62 | 
 63 | query2 :: Operator
 64 | query2 = Project ["name"] ["name"] (Filter (Eq (Value "34") (Field "age")) (Scan "data/test.csv" ["name", "age"]))
 65 | 
 66 | queryJoin :: Operator
 67 | queryJoin = Join (Scan "data/test.csv" ["name", "age"]) (Scan "data/test1.csv" ["name", "weight"])
 68 | 
 69 | 
 70 | data Predicate = Eq Ref Ref | Ne Ref Ref deriving Show
 71 | 
 72 | data Ref = Field ByteString | Value ByteString deriving Show
 73 | 
 74 | type QTExp a = Q (TExp a)
 75 | 
 76 | fix :: (a -> a) -> a
 77 | fix f = let x = f x in x
 78 | 
 79 | parseRow' :: Schema -> QTExp ByteString -> QTExp ByteString
 80 | parseRow' [] b = b
 81 | parseRow' [_] b = [|| (BC.tail (BC.dropWhile (/= '\n') $$b)) ||]
 82 | parseRow' (_:ss) b = parseRow' ss [||  (BC.tail (BC.dropWhile (/= ',') $$b)) ||]
 83 | 
 84 | processCSV :: Schema -> FilePath -> QTExp (IO [Record])
 85 | processCSV ss fp =
 86 |   [|| do
 87 |         bs <- B.readFile fp
 88 |         return $ $$(rows ss) bs ||]
 89 |   where
 90 |     rows :: Schema -> QTExp (ByteString -> [Record])
 91 |     rows sch = do
 92 |       [||
 93 |         fix (\r rs ->
 94 |           case rs of
 95 |             "" -> []
 96 |             _ ->
 97 |               $$(let (_, fs) = parseRow sch [||rs||]
 98 |                      Code head_rec = pure (Record fs sch)
 99 |                  in head_rec) : r $$(parseRow' sch [||rs||]) )||]
100 | 
101 | 
102 |     parseRow :: Schema -> QTExp ByteString -> (QTExp ByteString, [QTExp ByteString])
103 |     parseRow [] b = (b, [])
104 |     parseRow [_] b =
105 |       ([|| let res = BC.dropWhile (/= '\n') $$b in BC.tail res ||]
106 |       , [[|| BC.takeWhile (/= '\n') $$b ||]])
107 | 
108 |     parseRow (_:ss') b =
109 |       let new = [|| let res = BC.dropWhile (/= ',') $$b in BC.tail res ||]
110 |           (final, rs) = parseRow ss' new
111 |       in (final, [|| BC.takeWhile (/= ',') $$b ||] : rs)
112 | 
113 | evalPred :: Predicate -> Record -> Bool
114 | evalPred predicate rec =
115 |   case predicate of
116 |     Eq a b -> (==) (evalRef a rec) (evalRef b rec)
117 |     Ne a b -> (/=) (evalRef a rec) (evalRef b rec)
118 | 
119 | evalRef :: Ref -> Record -> ByteString
120 | evalRef (Value a) _ = a
121 | evalRef (Field name) r = getField name r
122 | 
123 | filterC :: (Record -> Code Bool) -> [Record] -> Code [Record]
124 | filterC f [] = Code [|| [] ||]
125 | filterC f (x:xs) = Code
126 |   [|| if $$(runCode $ f x) then x : $$(runCode $ filterC f xs) else $$(runCode $ filterC f xs) ||]
127 | 
128 | --filterC2 :: Lift a => (Code a -> Code Bool) -> Code [a] -> Code [a]
129 | --filterC2 f xs = Code [|| filter (\a -> $$(runCode $ f (Code [|| a ||]))) $$(runCode xs) ||]
130 | 
131 | 
132 | restrict :: Record -> Schema -> Schema -> Record
133 | restrict r newSchema parentSchema =
134 |   Record (map (flip getField r) parentSchema) newSchema
135 | 
136 | execOp :: Operator -> QTExp (IO [Record])
137 | execOp op  =
138 |   case op of
139 |     Scan filename sch -> processCSV sch filename
140 |     --Print p       -> execOp p (printFields . fields)
141 |     Filter predicate parent -> [|| do
142 |       rs <- $$(execOp parent)
143 |       return $ filter (evalPred predicate) rs ||]
144 |     {-
145 |     Project newSchema parentSchema parent -> do
146 |       rs <- execOp parent
147 |       return $ map (restrict newSchema parentSchema) rs
148 |     Join left right -> do
149 |       ls <- execOp left
150 |       rs <- execOp right
151 |       return $ do
152 |         rec <- ls
153 |         rec' <- rs
154 |         let keys = schema rec `intersect` schema rec'
155 |         guard (getFields keys rec == getFields keys rec')
156 |         (return (Record (fields rec ++ fields rec')
157 |                        (schema rec ++ schema rec')))
158 |                        -}
159 | 
160 | 
161 | runQuery :: Operator -> IO ()
162 | runQuery o = execOp o >>= print
163 | 
164 | main :: IO ()
165 | main = do
166 | --  processCSV "data/test.csv" (print . getField "name")
167 |   execOp query >>= print
168 | 
169 | 
170 | 
171 | 


--------------------------------------------------------------------------------
/src/StreamInterpreter.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | {-# LANGUAGE ScopedTypeVariables #-}
  3 | {-# LANGUAGE RankNTypes #-}
  4 | {-# LANGUAGE ExistentialQuantification #-}
  5 | {-# LANGUAGE FlexibleInstances #-}
  6 | {-# LANGUAGE BangPatterns #-}
  7 | module StreamInterpreter where
  8 | 
  9 | import qualified Data.ByteString.Streaming as B
 10 | import qualified Data.ByteString.Streaming.Char8 as Q
 11 | import qualified Data.ByteString.Char8 as BC
 12 | import Data.ByteString (ByteString)
 13 | import Control.Monad
 14 | import Data.List
 15 | import Data.Maybe
 16 | import Streaming.Internal
 17 | import qualified Streaming.Prelude as S
 18 | import Streaming
 19 | import Control.Monad.Trans.Resource
 20 | import Debug.Trace
 21 | import qualified Data.ByteString.Short as B
 22 |          (ShortByteString, toShort, fromShort)
 23 | 
 24 | type Fields = [B.ShortByteString]
 25 | type Schema = [B.ShortByteString]
 26 | type Table = FilePath
 27 | 
 28 | data Record = Record { fields :: !Fields, schema :: !Schema }
 29 | 
 30 | data Operator = Scan Table | Project Schema Schema Operator
 31 |               | Filter Predicate Operator | Join Operator Operator
 32 | 
 33 | query, query2 :: Operator
 34 | query = Project ["age"] ["age"] (Filter (Eq (Value "john") (Field "name")) (Scan "data/test.csv" ))
 35 | 
 36 | query2 = Project ["name"] ["name"] (Filter (Eq (Value "34") (Field "age")) (Scan "data/test.csv" ))
 37 | 
 38 | queryJoin :: Operator
 39 | queryJoin = Join (Scan "data/test.csv" ) (Scan "data/test1.csv" )
 40 | 
 41 | queryP :: Operator
 42 | queryP = Project ["name"] ["name"] (Scan "data/test.csv" )
 43 | 
 44 | data Predicate = Eq Ref Ref | Ne Ref Ref
 45 | 
 46 | data Ref = Field B.ShortByteString | Value B.ShortByteString
 47 | 
 48 | newtype Scanner = Scanner (BS ())
 49 | 
 50 | type BS a = Q.ByteString ResIO a
 51 | 
 52 | newScanner :: FilePath -> Scanner
 53 | newScanner fp = Scanner (Q.readFile fp)
 54 | 
 55 | nextLine :: Scanner -> Q.ByteString ResIO Scanner
 56 | nextLine (Scanner bs) =
 57 |   Scanner <$> Q.span (/= '\n') bs
 58 | 
 59 | hasNext :: Scanner -> ResIO (Of Bool Scanner)
 60 | hasNext (Scanner bs) = fmap Scanner <$> Q.testNull bs
 61 | 
 62 | data L m a = L [m a]
 63 | 
 64 | processCSV :: forall m . Monoid m => FilePath -> (Record -> ResIO m) -> ResIO m
 65 | processCSV fp yld = do
 66 |   scanner (newScanner fp)
 67 |   where
 68 |     scanner :: Scanner -> ResIO m
 69 |     scanner s = do
 70 |       fs :> rs <- Q.toStrict (nextLine s)
 71 |       let sch = map B.toShort (BC.split ',' fs)
 72 |       let f = rows sch rs
 73 |       destroy f (per_row sch) join (\_ -> pure mempty) -- (per_row sch yld) f
 74 | 
 75 |     per_row :: Schema -> Q.ByteString ResIO (ResIO m) -> ResIO m
 76 |     per_row fs s =
 77 |       let vs = S.toList $ mapped (fmap (first B.toShort) . Q.toStrict) $ Q.split ',' s
 78 |       in do
 79 |           a <- vs
 80 |           case a of
 81 |             ([""] :> m) -> m
 82 |             (bs :> m) -> yld (Record bs fs) <> m
 83 | 
 84 | 
 85 |     rows :: Schema -> Scanner -> Stream (Q.ByteString ResIO) ResIO ()
 86 |     rows fs (Scanner s) = Q.lines s
 87 | 
 88 | whenM :: Monoid m => Bool -> m -> m
 89 | whenM b act = if b then act else mempty
 90 | 
 91 | 
 92 | printFields :: Fields -> IO ()
 93 | printFields [] = putStr "\n"
 94 | printFields [!b] = BC.putStr (B.fromShort b) >> putStr "\n"
 95 | printFields (!b:bs) = BC.putStr (B.fromShort b) >> BC.putStr "," >> printFields bs
 96 | 
 97 | printFields2 [] = return ()
 98 | printFields2 (!b:bs) = printFields2 bs
 99 | 
100 | evalPred :: Predicate -> Record -> Bool
101 | evalPred  predicate rec =
102 |   case predicate of
103 |     Eq a b -> evalRef a rec == evalRef b rec
104 |     Ne a b -> evalRef a rec /= evalRef b rec
105 | 
106 | evalRef :: Ref -> Record -> B.ShortByteString
107 | evalRef (Value a) _ = a
108 | evalRef (Field name) r = getField name r
109 | 
110 | getField :: B.ShortByteString -> Record -> B.ShortByteString
111 | getField field (Record fs sch) =
112 |   let i = fromJust (elemIndex field sch)
113 |   in fs !! i
114 | 
115 | getFields :: [B.ShortByteString] -> Record -> [B.ShortByteString]
116 | getFields fs r = map (flip getField r) fs
117 | 
118 | restrict :: Record -> [B.ShortByteString] -> [B.ShortByteString] -> Record
119 | restrict r newSchema parentSchema =
120 |   Record (getFields parentSchema r) newSchema
121 | 
122 | execOp :: Monoid m => Operator -> (Record -> ResIO m) -> ResIO m
123 | execOp op yld =
124 |   case op of
125 |     Scan filename -> processCSV filename yld
126 |     Filter predicate parent -> execOp parent (\rec -> whenM (evalPred predicate rec) (yld rec))
127 |     Project newSchema parentSchema parent -> execOp parent (\rec -> yld (restrict rec newSchema parentSchema ))
128 |     Join left right ->
129 |       execOp left (\rec -> execOp right (\rec' ->
130 |         let keys = schema rec `intersect` schema rec'
131 |         in whenM (getFields keys rec == getFields keys rec')
132 |             (yld (Record (fields rec ++ fields rec')
133 |                        (schema rec ++ schema rec')))))
134 | 
135 | 
136 | runQuery :: Operator -> IO ()
137 | runQuery o = runResourceT $ execOp o (liftIO . printFields . fields)
138 | 
139 | runQueryL :: Operator -> IO [Record]
140 | runQueryL o = runResourceT $ execOp o (return . return)
141 | 
142 | main :: IO ()
143 | main = do
144 |   runResourceT $ processCSV "data/test.csv" (liftIO . print . getField "name")
145 |   runQuery query
146 | 
147 | 
148 | instance Semigroup m => Semigroup (ResourceT IO m) where
149 |   (<>) a1 a2 = do
150 |     (<>) <$> a1 <*> a2
151 | 
152 | instance Monoid m => Monoid (ResourceT IO m) where
153 |   mempty = pure mempty
154 | 
155 | 
156 | 


--------------------------------------------------------------------------------
/src/StreamInterpreter2.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | {-# LANGUAGE ScopedTypeVariables #-}
  3 | {-# LANGUAGE RankNTypes #-}
  4 | {-# LANGUAGE ExistentialQuantification #-}
  5 | {-# LANGUAGE FlexibleInstances #-}
  6 | {-# LANGUAGE BangPatterns #-}
  7 | {-# LANGUAGE DeriveFunctor #-}
  8 | module StreamInterpreter2 where
  9 | 
 10 | -- Like stream interpreter but with a different yield function
 11 | 
 12 | import qualified Data.ByteString.Streaming as B
 13 | import qualified Data.ByteString.Streaming.Char8 as Q
 14 | import qualified Data.ByteString.Char8 as BC
 15 | import Data.ByteString (ByteString)
 16 | import Control.Monad
 17 | import Data.List
 18 | import Data.Maybe
 19 | import Streaming.Internal
 20 | import qualified Streaming.Prelude as S
 21 | import Streaming
 22 | import Control.Monad.Trans.Resource
 23 | import Debug.Trace
 24 | import qualified Data.ByteString.Short as B
 25 |          (ShortByteString, toShort, fromShort)
 26 | 
 27 | data OfBSG m a b = OfBS !a (Q.ByteString m b) deriving (Functor)
 28 | 
 29 | type OfBS a b = OfBSG ResIO a b
 30 | 
 31 | getBS :: OfBS a b -> Q.ByteString ResIO b
 32 | getBS (OfBS a b) = b
 33 | 
 34 | -- How to parse one field, and the result of parsing the rest of the file
 35 | type Fields a = Stream (OfBSG ResIO B.ShortByteString) ResIO a
 36 | type Schema = [B.ShortByteString]
 37 | type Table = FilePath
 38 | 
 39 | data Record a = Record { fields :: !(Fields a), schema :: !Schema }
 40 | 
 41 | data Operator = Scan Table | Project Schema Schema Operator
 42 |               | Filter Predicate Operator | Join Operator Operator
 43 | 
 44 | query, query2 :: Operator
 45 | query = Project ["age"] ["age"] (Filter (Eq (Value "john") (Field "name")) (Scan "data/test.csv" ))
 46 | 
 47 | query2 = Project ["name"] ["name"] (Filter (Eq (Value "34") (Field "age")) (Scan "data/test.csv" ))
 48 | 
 49 | queryJoin :: Operator
 50 | queryJoin = Join (Scan "data/test.csv" ) (Scan "data/test1.csv" )
 51 | 
 52 | queryP :: Operator
 53 | queryP = Project ["name"] ["name"] (Scan "data/test.csv" )
 54 | 
 55 | queryF = (Filter (Eq (Value "john") (Field "name")) (Scan "data/test.csv" ))
 56 | 
 57 | data Predicate = Eq Ref Ref | Ne Ref Ref
 58 | 
 59 | data Ref = Field B.ShortByteString | Value B.ShortByteString
 60 | 
 61 | newtype Scanner = Scanner (BS ())
 62 | 
 63 | type BS a = Q.ByteString ResIO a
 64 | 
 65 | newScanner :: FilePath -> Scanner
 66 | newScanner fp = Scanner (Q.readFile fp)
 67 | 
 68 | nextLine :: Scanner -> Q.ByteString ResIO Scanner
 69 | nextLine (Scanner bs) =
 70 |   Scanner <$> Q.span (/= '\n') bs
 71 | 
 72 | hasNext :: Scanner -> ResIO (Of Bool Scanner)
 73 | hasNext (Scanner bs) = fmap Scanner <$> Q.testNull bs
 74 | 
 75 | 
 76 | data L m a = L [m a]
 77 | 
 78 | processCSV :: forall m . Monoid m => FilePath -> (Record (ResIO m) -> ResIO m) -> ResIO m
 79 | processCSV fp yld = do
 80 |   scanner (newScanner fp)
 81 |   where
 82 |     scanner :: Scanner -> ResIO m
 83 |     scanner s = do
 84 |       fs :> rs <- Q.toStrict (nextLine s)
 85 |       let sch = map B.toShort (BC.split ',' fs)
 86 |       let f = rows sch rs
 87 |       destroy f (per_row sch) join (\_ -> pure mempty) -- (per_row sch yld) f
 88 | 
 89 |     per_row :: Schema -> Q.ByteString ResIO (ResIO m) -> ResIO m
 90 |     per_row fs s =
 91 |       let vs = Q.split ',' s
 92 |       in yld $ Record (zipWithStream (OfBS) fs vs) fs
 93 | 
 94 | 
 95 |     rows :: Schema -> Scanner -> Stream (Q.ByteString ResIO) ResIO ()
 96 |     rows fs (Scanner s) = Q.lines s
 97 | 
 98 | zipWithStream
 99 |   :: (Monad m, Functor n)
100 |   =>  (forall x . a -> Q.ByteString m x -> n x)
101 |   -> [a]
102 |   -> Stream (Q.ByteString m) m r
103 |   -> Stream n m r
104 | zipWithStream op zs = loop zs
105 |   where
106 |     loop [] !ls      = loop zs ls
107 |     loop a@(x:xs)  ls = case ls of
108 |       Return r -> Return r
109 |       Step fls -> Step $ fmap (loop xs) (op x fls)
110 |       Effect mls -> Effect $ liftM (loop a) mls
111 | 
112 | {-#INLINABLE zipWithStream #-}
113 | 
114 | whenM :: (Monad n, Monoid m) => n Bool -> n m -> n m
115 | whenM ba act = ba >>= \b -> if b then act else return mempty
116 | 
117 | 
118 | printFields :: Fields (ResIO r) -> ResIO r
119 | printFields s = join (Q.putStrLn (Q.intercalate "," (maps getBS s)))
120 | {-
121 | printFields [] = putStr "\n"
122 | printFields [!b] = BC.putStr (B.fromShort b) >> putStr "\n"
123 | printFields (!b:bs) = BC.putStr (B.fromShort b) >> BC.putStr "," >> printFields bs
124 | -}
125 | 
126 | printFields2 [] = return ()
127 | printFields2 (!b:bs) = printFields2 bs
128 | 
129 | evalPred :: Predicate -> Record (ResIO a)
130 |          -> (Record (ResIO m) -> ResIO m)
131 |          -> ResIO m
132 | evalPred  predicate (Record d_fs sch) k = do
133 |   let r = toListBS $ d_fs
134 | 
135 |   return _
136 | --  k (Record s sch)
137 | 
138 |   where
139 |     go (fs :> s) =  _
140 |     pur_go fs =
141 |       case predicate of
142 |         Eq a b -> evalRef a fs sch == evalRef b fs sch
143 |         Ne a b -> evalRef a fs sch /= evalRef b fs sch
144 | 
145 | evalRef :: Ref -> [B.ShortByteString] -> [B.ShortByteString] -> B.ShortByteString
146 | evalRef (Value a) _ _ = a
147 | evalRef (Field name) fs sch = getField name fs sch
148 | 
149 | -- Copy and read all the fields in a stream
150 | toListBS' :: forall a b . (Stream (OfBSG ResIO a) ResIO b)
151 |          -> ResIO (Of [B.ShortByteString] (Stream (OfBSG ResIO a) ResIO b))
152 | toListBS' s = destroy s _ _ _
153 |   where
154 |     go :: OfBSG
155 |                ResIO
156 |                a
157 |                (ResIO (Of [B.ShortByteString] (Stream (OfBSG ResIO a) ResIO b)))
158 |              -> ResIO (Of [B.ShortByteString] (Stream (OfBSG ResIO a) ResIO b))
159 |     go (OfBS a b) = do
160 |       let v = Q.toStrict (Q.copy b)
161 | --      bs :> k2 <- k
162 |       return _ --((B.toShort b : bs) :> k2)
163 | 
164 | 
165 | -- Copy and read all the fields in a stream
166 | toListBS :: forall a b . (Stream (OfBSG ResIO a) ResIO b)
167 |          -> Stream (OfBSG ResIO a) ResIO (Of [B.ShortByteString] b)
168 | toListBS s = destroy s go effect (\b -> return ([] :> b))
169 |   where
170 |     go :: OfBSG
171 |                ResIO a (Stream (OfBSG ResIO a) ResIO (Of [B.ShortByteString] b))
172 |              -> Stream (OfBSG ResIO a) ResIO (Of [B.ShortByteString] b)
173 | 
174 |     go (OfBS a b) = do
175 |       let v = Q.toStrict (Q.copy b)
176 |           g (bs :> s) = fmap (\(bss :> b) -> (B.toShort bs: bss) :> b) s
177 |       wrap (OfBS a (fmap g v))
178 | --      ls :> b' <- fs
179 | --      return ((B.toShort l : ls) :> b')
180 | 
181 | getField :: B.ShortByteString -> [B.ShortByteString]
182 |          -> [B.ShortByteString] -> B.ShortByteString
183 | getField field fs sch =
184 |   let i = fromJust (elemIndex field sch)
185 |   in fs !! i
186 | 
187 | getFields :: [B.ShortByteString] -> Record a -> ResIO [B.ShortByteString]
188 | getFields fs (Record d_fs sch) = do
189 |   fs <- toListBS d_fs
190 |   return $ map (\f -> getField f fs sch) fs
191 | 
192 | restrict :: Record a -> [B.ShortByteString] -> [B.ShortByteString] -> Record a
193 | restrict r newSchema parentSchema =
194 |   Record (filterQ  (`elem` parentSchema) (fields r)) newSchema
195 | 
196 | -- | Skip elements of a stream that fail a predicate
197 | filterQ  :: forall a r m . Monad m => (a -> Bool) -> Stream (OfBSG m a) m r -> Stream (OfBSG m a) m r
198 | filterQ thePred = loop where
199 |   loop :: Monad m => Stream (OfBSG m a) m r -> Stream (OfBSG m a) m r
200 |   loop str = case str of
201 |     Return r       -> Return r
202 |     Effect m       -> Effect (fmap loop m)
203 |     Step (a `OfBS` as) -> if thePred a
204 |       then Step (a `OfBS` (fmap loop as))
205 |       else Step (a `OfBS` lift (Q.effects (fmap loop as)))
206 | 
207 | 
208 | execOp :: Monoid m => Operator -> (Record (ResIO m) -> ResIO m) -> ResIO m
209 | execOp op yld =
210 |   case op of
211 |     Scan filename -> processCSV filename yld
212 |     Filter predicate parent -> execOp parent (\rec -> (evalPred predicate rec) yld)
213 |     Project newSchema parentSchema parent -> execOp parent (\rec -> yld (restrict rec newSchema parentSchema ))
214 |     Join left right ->
215 |       execOp left (\rec -> execOp right (\rec' ->
216 |         let keys = schema rec `intersect` schema rec'
217 |         in whenM ((==) <$> (getFields keys rec) <*> (getFields keys rec'))
218 |             (yld (Record (fields rec <> fields rec')
219 |                        (schema rec ++ schema rec')))))
220 | 
221 | 
222 | runQuery :: Operator -> IO ()
223 | runQuery o = runResourceT $ execOp o (printFields . fields)
224 | 
225 | {-
226 | runQueryL :: Operator -> IO [Record ()]
227 | runQueryL o = runResourceT $ execOp o _
228 | -}
229 | 
230 | main :: IO ()
231 | main = do
232 | --  runResourceT $ processCSV "data/test.csv" (liftIO . print . getField "name")
233 |   runQuery query
234 | 
235 | 
236 | instance Semigroup m => Semigroup (ResourceT IO m) where
237 |   (<>) a1 a2 = do
238 |     (<>) <$> a1 <*> a2
239 | 
240 | instance Monoid m => Monoid (ResourceT IO m) where
241 |   mempty = pure mempty
242 | 
243 | 
244 | 


--------------------------------------------------------------------------------
/src/StreamLMS.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | {-# LANGUAGE TemplateHaskell #-}
  3 | {-# LANGUAGE NoImplicitPrelude #-}
  4 | {-# LANGUAGE ScopedTypeVariables #-}
  5 | {-# LANGUAGE FlexibleInstances #-}
  6 | {-# LANGUAGE Strict #-}
  7 | {-# OPTIONS_GHC -Wno-orphans #-}
  8 | module StreamLMS where
  9 | 
 10 | import qualified Data.ByteString as B
 11 | import qualified Data.ByteString.Char8 as BC
 12 | import Data.ByteString (ByteString)
 13 | import Control.Monad
 14 | import Data.List
 15 | import Data.Maybe
 16 | import Language.Haskell.TH
 17 | import Language.Haskell.TH.Syntax
 18 | import Prelude hiding (Applicative(..))
 19 | import Instances.TH.Lift ()
 20 | --import Data.FileEmbed
 21 | import Data.Functor.Identity
 22 | import Control.Applicative (liftA2)
 23 | import qualified Prelude as P
 24 | import qualified Data.ByteString.Streaming.Char8 as Q
 25 | import Control.Monad.Trans.Resource
 26 | import System.IO
 27 | import Debug.Trace
 28 | import Data.Functor.Of
 29 | 
 30 | class Ops r where
 31 |   eq :: Eq a => r a -> r a -> r Bool
 32 |   leq :: Eq a => r (IO a) -> r (IO a) -> r (IO Bool)
 33 |   lneq :: Eq a => r (IO a) -> r (IO a) -> r (IO Bool)
 34 |   neq :: Eq a => r a -> r a -> r Bool
 35 |   bc_split :: r Char -> r ByteString -> r [ByteString]
 36 |   tail_dropwhile :: Char -> r (BS ()) -> r (BS ())
 37 |   take_while :: Char -> r (BS ()) -> r (BS ())
 38 | 
 39 |   _case_of :: r (Of a b) -> r (a -> b -> c) -> r c
 40 | 
 41 |   _if :: r Bool -> r a -> r a -> r a
 42 |   _fix :: (r a -> r a) -> r a
 43 |   _lam :: (r a -> r b) -> r (a -> b)
 44 | 
 45 |   _bind :: Monad m => r (m a) -> (r (a -> m b)) -> r (m b)
 46 |   _print :: Show a => r a -> r Res
 47 |   _putStr :: r (BS ())  -> r (IO ())
 48 |   (>>>) :: Monoid m => r m -> r m -> r m
 49 |   _empty :: Monoid m => r m
 50 |   _pure :: P.Applicative f => r a -> r (f a)
 51 | 
 52 |   -- Scanner interface
 53 |   _newScanner :: FilePath -> r (IO Scanner)
 54 |   _hasNext :: r Scanner -> r (IO (Of Bool Scanner))
 55 |   _nextLine :: r Scanner -> (r (BS ()), r Scanner)
 56 | 
 57 |   liftBS :: r ByteString -> r (BS ())
 58 |   runBS :: r (BS ()) -> r (IO ByteString)
 59 | 
 60 |   pure :: Lift a => a -> r a
 61 |   (<*>) :: r (a -> b) -> r a -> r b
 62 | 
 63 | infixl 4 <*>
 64 | 
 65 | newtype Code a = Code (Q (TExp a))
 66 | 
 67 | instance Ops Code where
 68 |   eq (Code e1) (Code e2) = Code [|| $$e1 == $$e2 ||]
 69 |   neq (Code e1) (Code e2) = Code [|| $$e1 /= $$e2 ||]
 70 |   leq (Code e1) (Code e2) = Code [|| (==) <$> $$e1 P.<*> $$e2 ||]
 71 |   lneq (Code e1) (Code e2) = Code [|| (/=) <$> $$e1 P.<*> $$e2 ||]
 72 |   bc_split (Code e1) (Code e2) = Code [|| BC.split $$e1 $$e2 ||]
 73 |   _if (Code a) (Code b) (Code c) = Code [|| if $$a then $$b else $$c ||]
 74 |   (>>>) (Code a) (Code b) = Code [|| $$a <> $$b ||]
 75 |   _bind (Code a) (Code b) = Code [|| $$a >>= $$b ||]
 76 |   _empty = Code [|| mempty ||]
 77 |   _pure (Code p) = Code [|| P.pure $$p ||]
 78 | 
 79 |   _print (Code a) = Code [|| print $$a ||]
 80 |   _putStr (Code a) = Code [|| Q.putStr $$a ||]
 81 | 
 82 |   _case_of (Code a) (Code b) = Code [|| case $$a of
 83 |                                                   (v1 :> v2) -> $$b v1 v2 ||]
 84 | 
 85 |   tail_dropwhile c (Code b) = Code [|| Q.drop 1 (Q.dropWhile (/= c) $$b) ||]
 86 |   take_while c (Code b) = Code [|| Q.takeWhile (/= c) $$b ||]
 87 |   _fix f = Code [|| fix (\a -> $$(runCode $ f (Code [||a||]))) ||]
 88 | 
 89 |   _lam f = Code $ [|| \a ->  $$(runCode $ f (Code [|| a ||]))  ||]
 90 | 
 91 |   liftBS (Code b) = Code $ [|| Q.fromStrict $$(b) ||]
 92 |   runBS (Code b)  = Code $ [|| Q.toStrict_ $$b ||]
 93 | 
 94 | 
 95 |   _newScanner fp = Code [|| newScanner fp ||]
 96 |   _hasNext s = Code [|| hasNext $$(runCode s) ||]
 97 |   _nextLine = _nextLineCode
 98 | 
 99 | 
100 | 
101 |   pure = Code . unsafeTExpCoerce . lift
102 |   (Code f) <*> (Code a) = Code [|| $$f $$a ||]
103 | 
104 | 
105 | instance Ops Identity where
106 |   eq = liftA2 (==)
107 |   leq = liftA2 (liftA2 (==))
108 |   lneq = liftA2 (liftA2 (==))
109 | 
110 |   neq = liftA2 (/=)
111 |   bc_split = liftA2 (BC.split)
112 |   tail_dropwhile c = fmap (Q.drop 1 . (Q.dropWhile (/= c)))
113 |   take_while c = fmap (Q.takeWhile (/= c))
114 |   _print = fmap print
115 |   _putStr = fmap (Q.putStr)
116 |   _empty = Identity mempty
117 |   _if (Identity b) (Identity c1) (Identity c2) = Identity (if b then c1 else c2)
118 | 
119 |   _fix = fix
120 |   _lam f = Identity (\a -> runIdentity (f (Identity a)))
121 |   (>>>) = liftA2 mappend
122 |   _bind = liftA2 (>>=)
123 |   _pure = fmap (P.pure)
124 |   _newScanner fp = Identity (newScanner fp)
125 |   _hasNext = fmap hasNext
126 |   _nextLine = nextLine
127 | 
128 |   liftBS b = Q.fromStrict <$> b
129 |   runBS b  = Q.toStrict_ <$> b
130 | 
131 |   pure = Identity
132 |   (<*>) (Identity a1) (Identity a2) = Identity (a1 a2)
133 | 
134 | 
135 | list_eq :: (Ops r, Eq a) => [r (IO a)] -> [r (IO a)] -> r (IO Bool)
136 | list_eq [] [] = _pure (pure True)
137 | list_eq (v:vs) (v1:v1s) =
138 |   (leq v v1) `_bind` (_lam $ \b ->
139 |     _if b  (list_eq vs v1s) (_pure (pure False)))
140 | list_eq _ _ = _pure (pure False)
141 | 
142 | _when :: (Monoid m, Ops r) => r (IO Bool) -> r (IO m) -> r (IO m)
143 | _when cond act = cond `_bind` (_lam $ \b -> _if b act _empty)
144 | 
145 | _whenOf :: (Monoid m, Ops r) => r (IO (Of Bool t)) -> r (t -> IO m) -> r (IO m)
146 | _whenOf cond act = cond `_bind` (_lam $ \b
147 |                       -> _case_of b (_lam $ \b' -> _lam $ \t
148 |                                         -> _if b' (act <*> t) _empty))
149 | 
150 | _whenA :: (P.Applicative f, Ops r) => r Bool -> r (f ()) -> r (f ())
151 | _whenA cond act = _if cond act (_pure (pure ()))
152 | 
153 | runCode :: Code a -> Q (TExp a)
154 | runCode (Code a) = a
155 | 
156 | 
157 | 
158 | type Fields r = [r (BS ())]
159 | type Schema = [ByteString]
160 | type Table = FilePath
161 | 
162 | type Res = IO ()
163 | 
164 | data Record r = Record { fields :: Fields r, schema :: Schema }
165 | 
166 | getField :: ByteString -> Record r -> r (BS ())
167 | getField field (Record fs sch) =
168 |   let i = fromJust (elemIndex field sch)
169 |   in  (fs !! i)
170 | 
171 | getFields :: [ByteString] -> Record r -> [r (BS ())]
172 | getFields fs r = map (flip getField r) fs
173 | 
174 | 
175 | data Operator = Scan FilePath Schema | Project Schema Schema Operator
176 |               | Filter Predicate Operator | Join Operator Operator deriving Show
177 | 
178 | query, query2 :: Operator
179 | query = Project ["age"] ["age"] (Filter (Eq (Value "john") (Field "name")) (Scan "data/test.csv" ["name", "age"]))
180 | 
181 | query2 = Project ["name"] ["name"] (Filter (Eq (Value "34") (Field "age")) (Scan "data/test.csv" ["name", "age"]))
182 | 
183 | queryJoin :: Operator
184 | queryJoin = Join (Scan "data/test.csv" ["name", "age"]) (Scan "data/test1.csv" ["name", "weight"])
185 | 
186 | queryP :: Operator
187 | queryP = Project ["name"] ["name"] (Scan "data/test.csv" ["name", "age"])
188 | 
189 | data Predicate = Eq Ref Ref | Ne Ref Ref deriving Show
190 | 
191 | data Ref = Field ByteString | Value ByteString deriving Show
192 | 
193 | type QTExp a = Code a
194 | 
195 | fix :: (a -> a) -> a
196 | fix f = let x = f x in x
197 | 
198 | data Scanner = Scanner (BS ())
199 | 
200 | type BS a = Q.ByteString IO a
201 | 
202 | newScanner :: FilePath -> IO Scanner
203 | newScanner fp =
204 |   openBinaryFile fp ReadMode >>= \h -> return (Scanner (Q.fromHandle h))
205 | 
206 | nextLine :: Identity Scanner -> (Identity (BS ()), Identity Scanner)
207 | nextLine (Identity (Scanner bs)) =
208 |   (Identity $ Q.takeWhile (/= '\n') bs,
209 |    Identity $ Scanner (Q.drop 1 (Q.dropWhile (/= '\n') bs)))
210 | 
211 | -- As |span| is not stage aware, it is more dynamic an necessary. Splitting
212 | -- the implementation up means that we can skip over entire rows if
213 | -- necessary in the generated code.
214 | _nextLineCode :: Code Scanner -> (Code (BS ()), Code Scanner)
215 | _nextLineCode scanner =
216 |   let fs = Code [|| let (Scanner s) = $$(runCode scanner) in Q.takeWhile (/= '\n') s ||]
217 |       ts = Code [|| let (Scanner s) = $$(runCode scanner) in Scanner (Q.drop 1 (Q.dropWhile (/= '\n') s)) ||]
218 |   in (fs, ts)
219 | 
220 | 
221 | 
222 | hasNext :: Scanner -> IO (Of Bool Scanner)
223 | hasNext (Scanner bs) = (\(b :> s) -> (not b :> Scanner s)) <$> Q.testNull bs
224 | 
225 | 
226 | while ::
227 |   (Ops r, Monoid m) =>
228 |   r (t -> IO (Of Bool t)) -> r ((t -> IO m) -> t -> IO m) -> r (t -> IO m)
229 | while k b = _fix (\r -> _lam $ \rs -> _whenOf (k <*> rs) (_lam $ \rs -> b <*> r <*> rs))
230 | 
231 | whenM :: Monoid m => Bool -> m -> m
232 | whenM b act = if b then act else mempty
233 | 
234 | processCSV :: forall m r . (Monoid m, Ops r) => Schema -> FilePath -> (Record r -> r (IO m)) -> r (IO m)
235 | processCSV ss f yld =
236 |   _newScanner f `_bind` rows ss
237 |   where
238 |     rows :: Schema -> r (Scanner -> (IO m))
239 |     rows sch = do
240 |       while (_lam _hasNext)
241 |             (_lam $ \r -> _lam $ \rs ->
242 |               (let (hs, ts) = _nextLine rs
243 |               in yld (Record (parseRow sch hs) sch) >>> (r <*> ts))
244 |               )
245 | 
246 |     -- We can't use the standard |BC.split| function here because
247 |     -- we we statically know how far we will unroll. The result is then
248 |     -- more static as we can do things like drop certain fields if we
249 |     -- perform a projection.
250 |     parseRow :: Schema -> r (BS ()) -> [r (BS ())]
251 |     parseRow [] _ = []
252 |     parseRow [_] b =
253 |       [take_while '\n' b]
254 |     parseRow (_:ss') b =
255 |       let new = tail_dropwhile ',' b
256 |           rs = parseRow ss' new
257 |       in (take_while ',' b  : rs)
258 | 
259 | printFields :: Ops r => Fields r -> r (IO ())
260 | printFields [] = _empty
261 | printFields [x] = _putStr x >>> _putStr (liftBS (pure "\n"))
262 | printFields (x:xs) =
263 |   _putStr x >>> _putStr (liftBS $ pure ",") >>> printFields xs
264 | 
265 | evalPred :: Ops r => Predicate -> Record r -> r (IO Bool)
266 | evalPred  predicate rec =
267 |   case predicate of
268 |     Eq a b -> leq (evalRef a rec) (evalRef b rec)
269 |     Ne a b -> lneq (evalRef a rec) (evalRef b rec)
270 | 
271 | evalRef :: Ops r => Ref -> Record r -> r (IO ByteString)
272 | evalRef (Value a) _ = _pure (pure a)
273 | evalRef (Field name) r = runBS $ getField name r
274 | 
275 | 
276 | restrict :: Ops r => Record r -> Schema -> Schema -> Record r
277 | restrict r newSchema parentSchema =
278 |   Record (map (flip getField r) parentSchema) newSchema
279 | 
280 | 
281 | execOp :: (Monoid m, Ops r) => Operator -> (Record r -> r (IO m)) -> r (IO m)
282 | execOp op yld =
283 |   case op of
284 |     Scan file sch ->
285 |       processCSV sch file yld
286 |     Filter predicate parent -> execOp parent
287 |       (\rec -> _when (evalPred predicate rec) (yld rec) )
288 |     Project newSchema parentSchema parent ->
289 |       execOp parent (\rec -> yld (restrict rec newSchema parentSchema ))
290 |     Join left right ->
291 |       execOp left (\rec -> execOp right (\rec' ->
292 |         let keys = schema rec `intersect` schema rec'
293 |         in _when (list_eq (map runBS $ getFields keys rec) (map runBS $ getFields keys rec'))
294 |                (yld (Record (fields rec ++ fields rec')
295 |                        (schema rec ++ schema rec')))))
296 | 
297 | runQuery :: Operator -> Q (TExp Res)
298 | runQuery q = [|| $$(runCode $ execOp q (printFields . fields)) ||]
299 | 
300 | runQueryL :: Operator -> Q (TExp (IO [Record Identity]))
301 | runQueryL q = [|| $$(runCode $ execOp q (\r -> Code [|| return (return $$(runCode $ spill r)) ||])) ||]
302 | 
303 | spill :: Record Code -> Code (Record Identity)
304 | spill (Record rs ss) = Code [|| Record $$(runCode $ spill2 rs) ss ||]
305 | 
306 | spill2 :: [Code a] -> Code [Identity a]
307 | spill2 [] = Code [|| [] ||]
308 | spill2 (x:xs) = Code [|| (Identity $$(runCode x)) : $$(runCode $ spill2 xs) ||]
309 | 
310 | runQueryUnstaged :: Operator -> Res
311 | runQueryUnstaged q =  runIdentity (execOp q (printFields . fields))
312 | 
313 | runQueryUnstagedL :: Operator -> IO [Record Identity]
314 | runQueryUnstagedL q =  runIdentity (execOp q (return . return . return))
315 | 
316 | test :: IO ()
317 | test = do
318 | --  processCSV "data/test.csv" (print . getField "name")
319 |   expr <- runQ $ unTypeQ  $ runCode $ execOp query (printFields . fields)
320 | --  expr <- runQ $ unTypeQ  $ power
321 |   putStrLn $ pprint expr
322 | 
323 | 
324 | bsToExp :: B.ByteString -> Q Exp
325 | bsToExp bs = do
326 |     helper <- [| stringToBs |]
327 |     let chars = BC.unpack . BC.tail . (BC.dropWhile (/= '\n')) $ bs
328 |     return $! AppE helper $! LitE $! StringL chars
329 | 
330 | stringToBs :: String -> B.ByteString
331 | stringToBs = BC.pack
332 | 
333 | 


--------------------------------------------------------------------------------
/src/Test.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE TemplateHaskell #-}
  2 | {-# LANGUAGE OverloadedStrings #-}
  3 | {-# OPTIONS_GHC -ddump-splices  #-}
  4 | module Test where
  5 | 
  6 | --import qualified Compiler as C
  7 | import qualified LMS as L
  8 | --import qualified Interpreter as I
  9 | --import qualified SimpleInterpreter as SI
 10 | import qualified StreamLMS as S
 11 | --import qualified StreamInterpreter as StI
 12 | --import qualified StreamInterpreter2 as StI2
 13 | import Weigh
 14 | import GHC.Stats
 15 | import System.Mem
 16 | import System.IO
 17 | import Data.ByteString (ByteString)
 18 | import Control.Monad.Trans.Resource
 19 | 
 20 | import qualified Data.ByteString.Streaming.Char8 as Q
 21 | 
 22 | import Criterion
 23 | import Criterion.Main
 24 | import System.IO.Silently
 25 | 
 26 | 
 27 | --csvQuery = L.Filter (L.Eq (L.Value "cricket") (L.Field "word")) csvTable
 28 | 
 29 | --csvTable = L.Scan ["word", "year", "n1" "n2"] "1gram.csv"
 30 | 
 31 | 
 32 | for = flip map
 33 | 
 34 | main :: IO ()
 35 | main = do
 36 |   defaultMain $
 37 |     $$(L.genBench)
 38 | 
 39 |   {-
 40 |   putStrLn "Simple Interpreter"
 41 |   SI.runQuery SI.query
 42 |   SI.runQuery SI.queryJoin
 43 | 
 44 | 
 45 |   putStrLn "Interpreter"
 46 |   I.runQuery I.query
 47 |   I.runQuery I.queryJoin
 48 | 
 49 |   putStrLn "Compiler"
 50 |   $$(C.runQuery C.queryProj)
 51 |   $$(C.runQueryL C.queryProj)
 52 |   $$(C.runQuery C.query)
 53 |   $$(C.runQuery C.query2)
 54 |   $$(C.runQuery C.queryJoin)
 55 | -}
 56 |   putStrLn "LMS Compiler"
 57 |   {-
 58 |   mainWith ( do
 59 |       action "q1" $$(S.runQuery S.query)
 60 |       action "q2" $$(S.runQuery S.query2)
 61 |       action "q3" $$(S.runQuery S.queryJoin)
 62 |       action "q4" $$(S.runQuery S.queryP)
 63 |       )
 64 |       -}
 65 | {-
 66 |   defaultMain $ [bgroup "StI" [
 67 |     bench "q" $ whnfIO (silence $ StI.runQuery StI.query)
 68 |     , bench "q2" $ whnfIO (silence $ StI.runQuery StI.query2)
 69 |     , bench "qj" $ whnfIO (silence $ StI.runQuery StI.queryJoin)
 70 |     , bench "q" $ whnfIO (silence $ StI.runQuery StI.queryP)
 71 |     ]]
 72 |     ++ [ bgroup "StI2" [
 73 |     bench "q" $ whnfIO (silence $ StI.runQuery StI.query)
 74 |     , bench "q2" $ whnfIO (silence $ StI.runQuery StI.query2)
 75 |     , bench "qj" $ whnfIO (silence $ StI.runQuery StI.queryJoin)
 76 |     , bench "q" $ whnfIO (silence $ StI.runQuery StI.queryP)
 77 |     ]]
 78 |     ++  -}
 79 |   {-
 80 |   defaultMain $
 81 |     [bgroup "LMS-C" [
 82 |     bench "q" $ whnfIO (silence $$(S.runQuery S.query))
 83 |     , bench "q2" $ whnfIO (silence $$(S.runQuery S.query2))
 84 |     , bench "qj" $ whnfIO (silence $$(S.runQuery S.queryJoin))
 85 |     , bench "qp" $ whnfIO (silence $$(S.runQuery S.queryP)) ]]
 86 |     -}
 87 | 
 88 | {-
 89 |   $$(L.runQuery L.query)
 90 |   $$(L.runQuery L.query2)
 91 |   $$(L.runQuery L.queryJoin)
 92 |   $$(L.runQuery L.queryP)
 93 |   performGC
 94 |   s2 <- getRTSStats
 95 | 
 96 |   performGC
 97 |   s1 <- getRTSStats
 98 |   -}
 99 | 
100 | 
101 |   -- Observe that the number of max_live_bytes is much lower for the
102 |   -- streaming version. However, the number of allocated bytes is the same
103 |   -- for each example as we must read every character eventually.
104 |   --print (max_live_bytes s1)
105 |   --print (max_live_bytes s2)
106 | 
107 | 
108 | 
109 | 
110 | --
111 |   putStrLn "LMS Compiler Stream"
112 | {-
113 |   putStrLn "LMS Interpreter"
114 |   L.runQueryUnstaged L.query
115 |   L.runQueryUnstaged L.query2
116 |   L.runQueryUnstaged L.queryJoin
117 |   performGC
118 |   s3 <- getRTSStats
119 |   print (max_live_bytes s3)
120 |   -}
121 | 


--------------------------------------------------------------------------------