├── README ├── Setup.hs ├── refs ├── 1.png ├── 2.png ├── 3.png ├── report.pdf ├── Makefile ├── slidy │ └── slidy.css ├── disk_based_transactions.md └── report.tex ├── src ├── Makefile ├── System │ └── HaskDB │ │ ├── Fsync.hsc │ │ ├── Fsync.hs │ │ ├── Makefile │ │ ├── algorithm.md │ │ ├── FileHeader.lhs │ │ ├── lock-based-implementation.md │ │ ├── Introduction.md │ │ ├── Backend_test.hs │ │ ├── Backend.hs │ │ ├── FileHandling.lhs │ │ ├── TransactionFH.lhs │ │ ├── Journal.lhs │ │ └── Transactions.lhs ├── abstract.md └── tTest.hs ├── LICENSE └── HaskDB.cabal /README: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /refs/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/course/HaskDB/master/refs/1.png -------------------------------------------------------------------------------- /refs/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/course/HaskDB/master/refs/2.png -------------------------------------------------------------------------------- /refs/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/course/HaskDB/master/refs/3.png -------------------------------------------------------------------------------- /refs/report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/course/HaskDB/master/refs/report.pdf -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | clean : 2 | rm -f *.header 3 | rm -f *.trans 4 | rm -f *.journal 5 | -------------------------------------------------------------------------------- /refs/Makefile: -------------------------------------------------------------------------------- 1 | all: disk_based_transactions.md 2 | pandoc --data-dir=datadir --self-contained -t slidy -s disk_based_transactions.md -o disk_based_transactions-slidy.html 3 | pandoc --data-dir=datadir --self-contained -t slidy -s poster.md -o poster.html 4 | pandoc -t s5 -s disk_based_transactions.md -o disk_based_transactions-s5.html 5 | pandoc -t dzslides -s disk_based_transactions.md -o disk_based_transactions-dzslides.html 6 | # pandoc -t beamer -s disk_based_transactions.md -o disk_based_transactions.pdf 7 | 8 | 9 | clean: 10 | rm -rf *.html *.pdf 11 | -------------------------------------------------------------------------------- /src/System/HaskDB/Fsync.hsc: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ForeignFunctionInterface #-} 2 | module System.HaskDB.Fsync (sync) where 3 | 4 | import Foreign.C.Error (throwErrnoIfMinus1_) 5 | import Foreign.C.Types (CInt) 6 | import System.Posix.Types (Fd(..)) 7 | import System.Posix.IO (handleToFd) 8 | import System.IO 9 | 10 | #include 11 | 12 | foreign import ccall "fsync" 13 | c_fsync :: CInt -> IO CInt 14 | 15 | fsync :: Fd -> IO () 16 | fsync (Fd fd) = throwErrnoIfMinus1_ "fsync" $ c_fsync fd 17 | 18 | sync :: Handle -> IO () 19 | sync fh = do 20 | fd <- handleToFd fh 21 | fsync fd 22 | 23 | 24 | -------------------------------------------------------------------------------- /src/System/HaskDB/Fsync.hs: -------------------------------------------------------------------------------- 1 | module System.HaskDB.Fsync (sync) where 2 | import Foreign.C.Error 3 | import Foreign.C.Types 4 | import qualified System.IO as IO 5 | import qualified System.IO.Error as IO 6 | import GHC.IO.FD (FD(..)) 7 | import GHC.IO.Handle.Types (Handle__(..)) 8 | import GHC.IO.Handle.Internals (wantWritableHandle) 9 | import Data.Typeable 10 | foreign import ccall "unistd.h fsync" c_fsync :: CInt -> IO CInt 11 | sync :: IO.Handle -> IO () 12 | sync h = do 13 | IO.hFlush h 14 | wantWritableHandle "hSync" h $ fsyncH 15 | where 16 | fsyncH Handle__ {haDevice = dev} = maybe (return ()) fsyncD $ cast dev 17 | fsyncD FD {fdFD = fd} = throwErrnoPathIfMinus1_ "fsync" (show h) 18 | (c_fsync fd) 19 | -------------------------------------------------------------------------------- /src/abstract.md: -------------------------------------------------------------------------------- 1 | ## Abstract 2 | 3 | Transactions form an integral part of any multi-user concurrent database system. 4 | Most databases implement transactions by various locking mechanism . We present 5 | here lock free composable file transaction implemented in a high level 6 | functional programming language , Haskell. The use of haskell allows us to easily 7 | compose transactions and run them as a single atomic transaction thus providing 8 | a powerful abstraction for the programmer's use . The implementantion is not 9 | only highly concurrent but also non blocking as there are no locks on files. 10 | We have cleverly exploited the space efficient probabilistic data structure 11 | called Bloom Filters to keep primary memory requirements as low as possible. 12 | 13 | 14 | -------------------------------------------------------------------------------- /src/System/HaskDB/Makefile: -------------------------------------------------------------------------------- 1 | markdown : *.lhs 2 | pandoc -f markdown+lhs Introduction.md -o 01.md 3 | pandoc -f markdown+lhs lock-based-implementation.md -o 02.md 4 | pandoc -f markdown+lhs algorithm.md -o 03.md 5 | pandoc -f markdown+lhs Transactions.lhs -o 04.md 6 | pandoc -f markdown+lhs Journal.lhs -o 05.md 7 | pandoc -f markdown+lhs TransactionFH.lhs -o 06.md 8 | pandoc -f markdown+lhs FileHandling.lhs -o 07.md 9 | pandoc -f markdown+lhs FileHeader.lhs -o 08.md 10 | 11 | rm -f report.md 12 | touch report.md 13 | cat 01.md >> report.md 14 | cat 02.md >> report.md 15 | cat 03.md >> report.md 16 | cat 04.md >> report.md 17 | cat 05.md >> report.md 18 | cat 06.md >> report.md 19 | cat 07.md >> report.md 20 | cat 08.md >> report.md 21 | 22 | pandoc -f markdown+lhs report.md -o report.pdf 23 | clean : 24 | rm -f *.aux 25 | rm -f *.log 26 | rm -f *.ptb 27 | rm -f *.pdf 28 | 29 | cleanmd : 30 | rm -f *.md 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c)2012, Satvik Chauhan , Pankaj More 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of Satvik Chauhan , Pankaj More nor the names of other 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /src/tTest.hs: -------------------------------------------------------------------------------- 1 | import System.HaskDB.Transactions 2 | import System.HaskDB.TransactionFH 3 | import System.HaskDB.FileHeader 4 | import System.HaskDB.FileHandling 5 | import qualified System.HaskDB.FileHandling as FH 6 | import qualified Data.ByteString as BS 7 | import qualified Data.ByteString.Char8 as BSC 8 | import Control.Concurrent 9 | import Control.Applicative 10 | 11 | rand size = BS.take size $ BSC.pack $ take size $ cycle "This is a fttest data" 12 | rand2 size = BS.take size $ BSC.pack $ take size $ cycle "Hello" 13 | 14 | fttest = do 15 | writeBlockT 0 $ rand 4096 16 | writeBlockT 1 $ rand 4096 17 | writeBlockT 2 $ rand2 4096 18 | writeBlockT 3 $ rand 4096 19 | writeBlockT 4 $ rand2 4096 20 | writeBlockT 0 $ rand2 4096 21 | readBlockT 0 22 | 23 | fttest2 = do 24 | readBlockT 0 25 | 26 | fttest3 = do 27 | writeBlockT 0 $ rand2 4096 28 | 29 | main = do 30 | newDB <- openTF "fttest.dat" 31 | forkIO $ runTransaction fttest newDB >>= print 32 | forkIO $ retryTransaction fttest newDB >>= print 33 | forkIO $ retryTransaction fttest newDB >>= print 34 | forkIO $ retryTransaction fttest newDB >>= print 35 | forkIO $ retryTransaction fttest newDB >>= print 36 | forkIO $ retryTransaction fttest newDB >>= print 37 | forkIO $ retryTransaction fttest newDB >>= print 38 | forkIO $ retryTransaction fttest newDB >>= print 39 | forkIO $ retryTransaction fttest newDB >>= print 40 | forkIO $ retryTransaction fttest newDB >>= print 41 | forkIO $ retryTransaction fttest newDB >>= print 42 | t <- getFileVersion (fHandle newDB) 43 | print t 44 | {-sequencer newDB-} 45 | 46 | -------------------------------------------------------------------------------- /src/System/HaskDB/algorithm.md: -------------------------------------------------------------------------------- 1 | # Algorithm 2 | 3 | Any transaction has access to three operations : 4 | 5 | * readblock 6 | * writeblock 7 | * checkpoint 8 | 9 | When a write operation is performed , the change is stored in a separate *journal* file per transaction.When a read operation is performed , it reads from the latest file which contains the block data. 10 | 11 | Committing a transaction increments the *file version* of the trasaction file and marks its own journal file as valid. 12 | 13 | The set of all valid journal files and the main transaction file together represent the current state of the database at any given point of time. 14 | 15 | Transaction would fail/abort when the readblocks of the current transaction conflict with the blocks changed between the start of the current transaction and current *version* . 16 | 17 | Write operations always go through without any waiting period and hence they are optimal. 18 | The time complexity of read operation is proportional to the number of journal files on disk. 19 | 20 | checkpointing transfers all the data in journal files to the main database file. Hence , frequent checkpointing would improve the overhead of read operations. 21 | 22 | There is a tradeoff between average read performance and average write performance. To maximize the read performance, one wants to keep the number of journal files as small as possible and hence run checkpoints frequently. To maximize write performance, one wants to amortize the cost of each checkpoint over as many writes as possible, meaning that one wants to run checkpoints infrequently and let the no of journal files grow as large as possible before each checkpoint. The decision of how often to run checkpoints may therefore vary from one application to another depending on the relative read and write performance requirements of the application. 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /src/System/HaskDB/FileHeader.lhs: -------------------------------------------------------------------------------- 1 | > module System.HaskDB.FileHeader where 2 | > import qualified System.HaskDB.FileHandling as FH 3 | > import qualified Data.ByteString as BS 4 | > import System.IO 5 | > import Data.Serialize 6 | > import Data.IORef 7 | > 8 | > -- | Header of The Database to store schema and other related information . Only One thread can operate with the header 9 | > -- at a time . Other threads will be blocked untill allowed . 10 | > -- 11 | > data Header = Header 12 | > {fileVersion :: Integer 13 | > } 14 | > instance Serialize Header where 15 | > put h = put $ fileVersion h 16 | > get = do 17 | > w <- get 18 | > return $ Header w 19 | > 20 | > -- | Creates an empty header File . 21 | > {-createHeader :: FH.FHandle -> IO () -} 22 | > {-createHeader fh = do -} 23 | > {-fp <- openFile (getHeaderName fh) WriteMode -} 24 | > {-hClose fp -} 25 | > 26 | > -- | Change the FileVersion . 27 | > -- TODO : Make read and write together atomic and non failing in case of exception. 28 | > changeFileVersion :: FH.FHandle -> IO () 29 | > changeFileVersion fh = do 30 | > atomicModifyIORef (FH.fileVersion fh) (\a -> (a+1,())) 31 | > {-writeIORef (FH.fileVersion fh) bs -} 32 | > {-BS.writeFile headerPath (encode $ Header bs)-} 33 | > {-where -} 34 | > {-headerPath = getHeaderName fh -} 35 | > 36 | > -- | Get the current Version of the File . 37 | > getFileVersion :: FH.FHandle -> IO Integer 38 | > getFileVersion fh = readIORef $ FH.fileVersion fh 39 | > {-header <- BS.readFile $ getHeaderName fh-} 40 | > {-case decode header of -} 41 | > {-Left err -> error err -} 42 | > {-Right d -> return $ fileVersion d-} 43 | > 44 | > 45 | > -- | Given File Handle return the header file name 46 | > getHeaderName :: FH.FHandle -> FilePath 47 | > getHeaderName fh = FH.filePath fh ++ ".header" 48 | 49 | 50 | -------------------------------------------------------------------------------- /HaskDB.cabal: -------------------------------------------------------------------------------- 1 | -- HaskDB.cabal auto-generated by cabal init. For additional options, 2 | -- see 3 | -- http://www.haskell.org/cabal/release/cabal-latest/doc/users-guide/authors.html#pkg-descr. 4 | -- The name of the package. 5 | Name: HaskDB 6 | 7 | -- The package version. See the Haskell package versioning policy 8 | -- (http://www.haskell.org/haskellwiki/Package_versioning_policy) for 9 | -- standards guiding when and how versions should be incremented. 10 | Version: 0.1 11 | 12 | -- A short (one-line) description of the package. 13 | Synopsis: Persistent database in Haskell 14 | 15 | -- A longer description of the package. 16 | -- Description: 17 | 18 | -- The license under which the package is released. 19 | License: BSD3 20 | 21 | -- The file containing the license text. 22 | License-file: LICENSE 23 | 24 | -- The package author(s). 25 | Author: Satvik Chauhan , Pankaj More 26 | 27 | -- An email address to which users can send suggestions, bug reports, 28 | -- and patches. 29 | Maintainer: mystic.satvik@gmail.com 30 | 31 | -- A copyright notice. 32 | -- Copyright: 33 | 34 | Category: System 35 | 36 | Build-type: Simple 37 | 38 | -- Extra files to be distributed with the package, such as examples or 39 | -- a README. 40 | -- Extra-source-files: 41 | 42 | -- Constraint on the version of Cabal needed to build this package. 43 | Cabal-version: >=1.2 44 | 45 | 46 | Library 47 | -- Modules exported by the library. 48 | Exposed-modules: System.HaskDB.FileHandling, System.HaskDB.Journal, System.HaskDB.Transactions, System.HaskDB.Fsync 49 | Hs-Source-Dirs : src/ 50 | 51 | -- Packages needed in order to build this package. 52 | Build-depends: base >= 4 && < 5, cereal >= 0.3.5 , bytestring ,text , containers , random >= 1.0 ,directory >= 1.1,dequeue >= 0.1.5,bloomfilter >= 1.2.6 ,unix 53 | 54 | -- Modules not exported by this package. 55 | -- Other-modules: 56 | 57 | -- Extra tools (e.g. alex, hsc2hs, ...) needed to build the source. 58 | -- Build-tools: 59 | 60 | -------------------------------------------------------------------------------- /src/System/HaskDB/lock-based-implementation.md: -------------------------------------------------------------------------------- 1 | Most of the existing implementations use locks on files for concurrency control. 2 | 3 | # A simplified model of database file locking 4 | 5 | From the point of view of a single process, a database file can be in one of five locking states: 6 | 7 | * **UNLOCKED** : No locks are held on the database. The database may be neither read nor written. Any internally cached data is considered suspect and subject to verification against the database file before being used. Other processes can read or write the database as their own locking states permit. This is the default state. 8 | * **SHARED** : The database may be read but not written. Any number of processes can hold SHARED locks at the same time, hence there can be many simultaneous readers. But no other thread or process is allowed to write to the database file while one or more SHARED locks are active. 9 | * **RESERVED** : A RESERVED lock means that the process is planning on writing to the database file at some point in the future but that it is currently just reading from the file. Only a single RESERVED lock may be active at one time, though multiple SHARED locks can coexist with a single RESERVED lock. RESERVED differs from PENDING in that new SHARED locks can be acquired while there is a RESERVED lock. 10 | * **PENDING** : A PENDING lock means that the process holding the lock wants to write to the database as soon as possible and is just waiting on all current SHARED locks to clear so that it can get an EXCLUSIVE lock. No new SHARED locks are permitted against the database if a PENDING lock is active, though existing SHARED locks are allowed to continue. 11 | * **EXCLUSIVE** : An EXCLUSIVE lock is needed in order to write to the database file. Only one EXCLUSIVE lock is allowed on the file and no other locks of any kind are allowed to coexist with an EXCLUSIVE lock. 12 | 13 | An example of database engine which follows such model is SQlite. 14 | Multiple readers can read from the database file. 15 | Only a single writer can write to the database at any given time and all the reader have to wait till the write is complete. 16 | Hence such a model is good for heavy read-based applications but inefficient when multiple write transactions try to modify the database concurrently. 17 | 18 | 19 | -------------------------------------------------------------------------------- /src/System/HaskDB/Introduction.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | A Transaction is a group of operations combined into a logical unit of work. Developer uses transactions to control and maintain the consistency and integrity of each action in a transaction, despite errors that might occur inthe system either due to concurrency or hardware failure. In database context a transaction on a database is considered to be set of operations performed such that database is in a consistent state before and after the transaction and in case of failure the system must provide ways to rollback partial transactions to bring the database back into a consistent state. Transaction in the database environment mainly serve two purposes : 4 | 5 | 1. To provide reliable units of work that allow correct recovery from failures and keep a database consistent even in cases of system failure, when execution stops (completely or partially) and many operations upon a database remain uncompleted, with unclear status. 6 | 2. To provide isolation between programs accessing a database concurrently. If this isolation is not provided the programs outcome are possibly erroneous. 7 | 8 | Transactions provide an "all-or-nothing" proposition, stating that each work-unit performed in a database must either complete in its entirety or have no effect whatsoever. Further, the system must isolate each transaction from other transactions, results must conform to existing constraints in the database, and transactions that complete successfully must get written to durable storage. Thus , even in case of hardware failure a transaction once commited must persist. A transaction is expected satisfy ACID guarantees. 9 | 10 | * **Atomicity** means a transaction can end only in two ways : either successfully , in which case all its effects are written to a durable storage and persist between power failures , or unsuccessfully , in which case it has no effect , it can be assumed that this transaction never happened. 11 | * **Consistency** just means that a transaction is written correctly, so that when it completes successfully, the database is in a consistent state. 12 | * **Isolation** means that the transaction appears to execute completely alone, even if, in fact, other transactions are running simultaneously. In other words, transaction always sees a consistent snapshot of the database and is totally unaware of the changes made by other transactions which are running concurrently with it. 13 | * **Durability** means that a successful transaction's changes are permanent and persist in all possible sorts of failures. In practice this means to be written on disk. 14 | 15 | 16 | -------------------------------------------------------------------------------- /src/System/HaskDB/Backend_test.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleInstances #-} 2 | import System.HaskDB.Backend 3 | import Test.QuickCheck 4 | import Control.Monad 5 | import qualified Data.Map as M 6 | import Test.HUnit 7 | import Test.Framework (defaultMain, testGroup) 8 | import Test.Framework.Providers.QuickCheck2 (testProperty) 9 | 10 | instance Arbitrary Mode where 11 | arbitrary = oneof $ map return [Write , ReadWrite , Read] 12 | 13 | instance (Ord a, Arbitrary a,Arbitrary b) => Arbitrary (M.Map a b) where 14 | arbitrary = liftM M.fromList (listOf1 arbitrary) 15 | 16 | instance Arbitrary a => Arbitrary (File a) where 17 | arbitrary = liftM2 File arbitrary arbitrary 18 | 19 | instance Arbitrary a => Arbitrary (TestDisk a) where 20 | arbitrary = do 21 | b <- arbitrary :: Arbitrary b => Gen (M.Map FilePath (File b)) 22 | let list = M.toList b 23 | len <- choose (1,length list) 24 | subset <- takeSubset len list 25 | size <- arbitrary :: Gen Int 26 | op <- openFiles (map fst subset) M.empty 27 | return $ TestDisk b b size op 28 | where 29 | takeSubset :: Int -> [a] -> Gen [a] 30 | takeSubset 0 list = return list 31 | takeSubset n list = do 32 | t <- oneof $ map return list 33 | rest <- takeSubset (n-1) list 34 | return (t:rest) 35 | openFiles :: [FilePath] -> M.Map FilePath Mode -> Gen (M.Map FilePath Mode) 36 | openFiles [] f = return f 37 | openFiles (x:xs) f = do 38 | md <- arbitrary :: Gen Mode 39 | openFiles xs (M.insert x md f) 40 | 41 | 42 | -- closing two times a file should be same 43 | prop_double_close :: TestDisk String -> String -> Bool 44 | prop_double_close t s = (closeFile (closeFile t s) s) == closeFile t s 45 | 46 | -- writing and then reading the same block should return same value 47 | prop_readwrite :: TestDisk String -> String -> Int -> String -> Bool 48 | prop_readwrite t fp bn val = readB (writeB (openFile t fp ReadWrite) fp bn val) fp bn buffers == Just val 49 | 50 | -- after syncing block should be on disk 51 | prop_sync :: TestDisk String -> String -> Int -> String -> Bool 52 | prop_sync t fp bn val = readB (flushBuffer (writeB (openFile t fp ReadWrite) fp bn val) fp) fp bn disk == Just val 53 | 54 | -- closing a file automatically syncs 55 | prop_close :: TestDisk String -> String -> Int -> String -> Bool 56 | prop_close t fp bn val = readB (closeFile (writeB (openFile t fp ReadWrite) fp bn val) fp) fp bn disk == Just val 57 | 58 | main = defaultMain tests 59 | tests = [ 60 | testGroup "Sorting Group 1" [ 61 | testProperty "double close " prop_double_close, 62 | testProperty "read and write " prop_readwrite, 63 | testProperty "sync" prop_sync, 64 | testProperty "close" prop_close 65 | ]] 66 | 67 | -------------------------------------------------------------------------------- /src/System/HaskDB/Backend.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FunctionalDependencies #-} 2 | {-# LANGUAGE FlexibleInstances #-} 3 | {-# LANGUAGE FlexibleContexts #-} 4 | {-# LANGUAGE MultiParamTypeClasses #-} 5 | {-# LANGUAGE TypeFamilies #-} 6 | module System.HaskDB.Backend where 7 | import qualified Data.Map as M 8 | import Control.Monad.State 9 | import Control.Applicative 10 | import Data.Maybe 11 | 12 | 13 | class (Show a,Monad m) => Backend b m a | b -> m where 14 | data Handle :: * -> * 15 | type BlockNumber :: * -- I want BlockNumber to have constraint Eq 16 | open :: FilePath -> Mode -> m (Handle b) 17 | close :: Handle b -> m () 18 | readBlock :: (Eq BlockNumber) => Handle b -> BlockNumber -> m a 19 | writeBlock :: (Eq BlockNumber) => Handle b -> BlockNumber -> a -> m () 20 | sync :: Handle b -> m () 21 | 22 | 23 | 24 | 25 | instance Show a => Backend (TestDisk a) (State (TestDisk a)) a where 26 | data Handle (TestDisk a) = Handle FilePath 27 | type BlockNumber = Int 28 | open fp m = do 29 | disk <- get 30 | put $ openFile disk fp m 31 | return $ Handle fp 32 | close (Handle fp) = do 33 | disk <- get 34 | put $ closeFile disk fp 35 | readBlock (Handle fp) bn = do 36 | t <- get 37 | case readB t fp bn buffers of 38 | Nothing -> error "Block Not Found" 39 | Just a -> return a 40 | writeBlock (Handle fp) bn a = do 41 | t <- get 42 | put $ writeB t fp bn a 43 | sync (Handle fp) = do 44 | t <- get 45 | put $ flushBuffer t fp 46 | 47 | -- Pure Functions to simulate file system . This implementation only support one handle per file . 48 | -- Pessimistic implementation so data is not written to the simulated disk until sync is not called . 49 | data TestDisk a = TestDisk { 50 | disk :: M.Map FilePath (File a) 51 | , buffers :: M.Map FilePath (File a) 52 | , bufferSize :: Int -- Current buffer Size 53 | , openFiles :: M.Map FilePath Mode 54 | } deriving (Eq,Show) 55 | 56 | data File a = File { 57 | blocks :: M.Map Int a 58 | , size :: Int 59 | } deriving (Eq, Show) 60 | 61 | data Mode = ReadWrite | Read | Write deriving (Eq,Show) 62 | 63 | -- Opens the file . If file is not present then creates it . Also create the buffer space. 64 | openFile :: TestDisk a -> FilePath -> Mode -> TestDisk a 65 | openFile t fp md = let 66 | d = if M.member fp (disk t) 67 | then t 68 | else t { disk = M.insert fp (File M.empty 0) (disk t) 69 | , buffers = M.insert fp (File M.empty 0) (buffers t)} 70 | opfs = openFiles d 71 | newOpfs = if M.member fp opfs then opfs else M.insert fp md opfs 72 | in d {openFiles = newOpfs} 73 | 74 | -- Flushes the buffer and closes the handle . 75 | closeFile :: TestDisk a -> FilePath -> TestDisk a 76 | closeFile t fp = let opfs = openFiles t 77 | d = if M.member fp (disk t) && M.member fp (buffers t) then flushBuffer t fp else t 78 | newOpfs = M.delete fp opfs 79 | in d {openFiles = newOpfs , buffers = M.delete fp (buffers t)} 80 | 81 | -- Writes the buffers of the given handle to the disk 82 | flushBuffer :: TestDisk a -> FilePath -> TestDisk a 83 | flushBuffer t fp = let file = M.findWithDefault undefined fp (disk t) -- Sure that undefined will never be called 84 | fsize = size file 85 | buffer = M.findWithDefault undefined fp (buffers t) 86 | bsize = size buffer 87 | blks = M.union (blocks buffer) (blocks file) 88 | in t { disk = M.insert fp (File blks (fsize + bsize)) (disk t) 89 | , buffers = M.insert fp (File M.empty 0) (buffers t)} 90 | 91 | 92 | isClosed :: TestDisk a -> FilePath -> Bool 93 | isClosed t fp = M.member fp (openFiles t ) 94 | 95 | -- Reads from the buffers and if not present then reads from the disk 96 | readB :: TestDisk a -> FilePath -> Int -> (TestDisk a -> M.Map FilePath (File a)) -> Maybe a 97 | readB t fp bn from = case join (M.lookup bn . blocks <$> M.lookup fp (from t)) of 98 | Nothing -> readB t fp bn disk 99 | Just a -> Just a 100 | 101 | 102 | -- Writes to the buffers 103 | writeB :: TestDisk a -> FilePath -> Int -> a -> TestDisk a 104 | writeB t fp bn v = let file = (\(File b s) -> File (M.insert bn v b) s) <$> M.lookup fp (buffers t) 105 | in maybe t (\a -> t {buffers = M.insert fp a (buffers t) , bufferSize = bufferSize t + (sizeOf a)}) file 106 | 107 | sizeOf _ = 1 108 | 109 | 110 | 111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /src/System/HaskDB/FileHandling.lhs: -------------------------------------------------------------------------------- 1 | > module System.HaskDB.FileHandling where 2 | > 3 | > import System.IO 4 | > import GHC.Word 5 | > import qualified Data.ByteString as BS 6 | > import qualified Data.ByteString.Char8 as BSC 7 | > import Control.Concurrent 8 | > import Control.Applicative 9 | > import System.HaskDB.Fsync 10 | > import Data.IORef 11 | > -- | New File Handle with blocksize in Bytes stored in the Handle 12 | > -- synchVar is used to provide atomicity to read and write operations . 13 | > data FHandle = FHandle { 14 | > fileVersion :: IORef (Integer) , 15 | > journalId :: IORef (Integer) , 16 | > filePath :: FilePath , 17 | > synchVar :: MVar () , 18 | > blockSize :: Int , 19 | > handle :: Handle 20 | > } 21 | > 22 | > -- | Opens the file given path , mode and BlockSize and returns the file handle 23 | > openF :: FilePath -> IOMode -> Int -> IO FHandle 24 | > openF fp m bs = do 25 | > {-print ("opening handle" ++ fp)-} 26 | > p <- openBinaryFile fp m 27 | > sync <- newMVar () 28 | > ver <- newIORef 0 29 | > jid <- newIORef 0 30 | > return $ FHandle ver jid fp sync bs p 31 | > 32 | > 33 | > -- | Closes the file Handle 34 | > closeF :: FHandle -> IO () 35 | > closeF fh = do 36 | > {-print ("closing handle" ++ (filePath fh))-} 37 | > hClose $ handle fh 38 | > 39 | > -- | Given the File Handle and block number , reads the block and returns it 40 | > readBlock :: FHandle -> Integer -> IO BS.ByteString 41 | > readBlock fh i = do 42 | > isCl <- hIsClosed (handle fh) 43 | > if isCl 44 | > then 45 | > print ("Handle is Closed" ++ (filePath fh)) 46 | > else return () 47 | > _ <- takeMVar (synchVar fh) 48 | > hSeek (handle fh) AbsoluteSeek $ (toInteger $ blockSize fh)*i 49 | > ret <- fst <$> BS.foldr (\a (ls,b) -> if (a /= 000) then (BS.cons a ls,True) else ( if b then (BS.cons a ls,b) else (ls,b)) ) (BS.empty,False) <$> BS.hGet (handle fh) (blockSize fh) -- filters out \NUL character . 50 | > putMVar (synchVar fh) () 51 | > return ret 52 | > 53 | > -- | Given the File Handle and block number and data to be written in ByteString , writes the given block. Adds \NUL if data is less than the block size . 54 | > writeBlock :: FHandle -> Integer -> BS.ByteString -> IO () 55 | > writeBlock fh i bs = do 56 | > _ <- takeMVar (synchVar fh) 57 | > currentPos <- hTell (handle fh) 58 | > hSeek (handle fh) AbsoluteSeek $ (toInteger $ blockSize fh)*i 59 | > BS.hPut (handle fh) (BS.take (blockSize fh) (BS.append bs (BS.pack (take (blockSize fh) $ cycle [000 :: GHC.Word.Word8] )))) 60 | > hSeek (handle fh) AbsoluteSeek currentPos -- Necessary because concurrent use of appendBlock and writeBlock was resulting in overwriting of block next to where writeBlock was called with append block . 61 | > putMVar (synchVar fh) () 62 | > 63 | > -- | Writes all the data . Note that write Block truncates data if size is more than the given block. This will delete all the previous data present in the file. 64 | > writeAll :: FHandle -> BS.ByteString -> IO () 65 | > writeAll fh bs = do 66 | > hSeek (handle fh) AbsoluteSeek 0 67 | > BS.hPut (handle fh) bs 68 | > size <- hTell (handle fh) 69 | > hSetFileSize (handle fh) size 70 | > 71 | > readAll :: FHandle -> IO BS.ByteString 72 | > readAll fh = do 73 | > hSeek (handle fh) AbsoluteSeek 0 74 | > BS.hGetContents (handle fh) 75 | > 76 | > -- | Appends a block at the end of the file 77 | > appendBlock :: FHandle -> BS.ByteString -> IO Integer 78 | > appendBlock fh bs = do 79 | > hSeek (handle fh) SeekFromEnd 0 80 | > currentPos <- hTell (handle fh) 81 | > BS.hPut (handle fh) (BS.take (blockSize fh) (BS.append bs (BS.pack (take (blockSize fh) $ cycle [000 :: GHC.Word.Word8] )))) 82 | > return.floor $ (fromIntegral currentPos) / (fromIntegral $ blockSize fh) 83 | > 84 | > -- | Reads the last Block of the File and removes if from the File 85 | > getLastBlock :: FHandle -> IO (Maybe BS.ByteString) 86 | > getLastBlock fh = do 87 | > fs <- hFileSize (handle fh) 88 | > if fs > 0 89 | > then do 90 | > hSeek (handle fh) SeekFromEnd (-(fromIntegral $ blockSize fh)) 91 | > bs <- BS.hGet (handle fh) (blockSize fh) 92 | > {-bs <- BS.hGet (handle fh) 32-} 93 | > hSetFileSize (handle fh) (fs - (fromIntegral $ blockSize fh)) 94 | > return $ Just bs 95 | > else 96 | > return Nothing 97 | > 98 | > 99 | > 100 | > 101 | > -- | Flushes the buffer to hard disk 102 | > flushBuffer :: FHandle -> IO () 103 | > flushBuffer fh = sync $ handle fh 104 | > 105 | > -- | Zeroes out the file . 106 | > truncateF :: FHandle -> IO () 107 | > truncateF fh = hSetFileSize (handle fh) 0 108 | > 109 | > test = do 110 | > c <- openF "abc.b" WriteMode 1024 -- Truncates to zero length file 111 | > closeF c 112 | > p <- openF "abc.b" ReadWriteMode 1024 113 | > forkIO $ do 114 | > sequence_ $ map (\s -> appendBlock p (BSC.pack (show s))) [1..100] 115 | > forkIO $ do 116 | > sequence_ $ map (\s -> appendBlock p (BSC.pack (show s))) [101..200] 117 | > appendBlock p (BSC.pack "Hello How are you" ) 118 | > writeBlock p 0 (BSC.pack "First Block") 119 | > bs <- appendBlock p (BSC.pack "check") 120 | > print bs 121 | > threadDelay 1000 -- To keep thread blocked and not close the handle before data is being written . 122 | > flushBuffer p 123 | > closeF p 124 | > p <- openF "abc.b" ReadMode 1024 125 | > x <- sequence $ map (readBlock p) [0..500] 126 | > print x 127 | > closeF p 128 | 129 | 130 | -------------------------------------------------------------------------------- /src/System/HaskDB/TransactionFH.lhs: -------------------------------------------------------------------------------- 1 | > module System.HaskDB.TransactionFH where 2 | > 3 | > import qualified Data.ByteString as BS 4 | > import qualified System.HaskDB.FileHandling as FH 5 | > import qualified System.HaskDB.FileHeader as FHD 6 | > import qualified Data.Dequeue as DQ 7 | > import Data.Maybe 8 | > import Data.Dequeue 9 | > import Data.BloomFilter.Easy 10 | > import Data.IORef 11 | > import qualified Data.BloomFilter as BF 12 | > import System.HaskDB.Journal as JU 13 | > import qualified Data.ByteString as BS 14 | > import Data.Unique 15 | > import Data.Word 16 | > import Data.Serialize 17 | > import Control.Concurrent 18 | > import Control.Applicative 19 | > import System.IO 20 | > 21 | > data BlockData = BlockData BS.ByteString 22 | > type BlockNumber = Integer 23 | > data LogDescriptor = LogDescriptor 24 | > type FileInformation = FH.FHandle 25 | > type FileVersion = Integer 26 | > 27 | > readBlock = FH.readBlock 28 | > writeBlock = FH.writeBlock 29 | > 30 | > data Transaction = Transaction { 31 | > rBlocks :: BlockList 32 | > , tType :: TransactionType 33 | > } 34 | > 35 | > data TransactionType = ReadOnly | ReadWrite { 36 | > bloom :: BF.Bloom BlockNumber 37 | > , journal :: Journal 38 | > } 39 | > 40 | > 41 | > type JBloom = BF.Bloom BlockNumber 42 | > data JInfo = JInfo 43 | > { getJournal :: JU.Journal 44 | > , getBloomFilter :: JBloom 45 | > , getfv :: Integer 46 | > } 47 | > 48 | > -- TODO : Heap implementation of transactions is much better as only operation to support is 49 | > -- getMinFileVersion and insert a transaction and delete a transaction. 50 | > data TFile = TFile { 51 | > fHandle :: FH.FHandle -- handle of the database file 52 | > , commitSynch :: MVar () 53 | > , jQueue :: IORef (DQ.BankersDequeue JInfo) 54 | > , failedQueue :: IORef (DQ.BankersDequeue (Integer,FileVersion)) 55 | > , commitedQueue :: IORef (DQ.BankersDequeue JInfo) 56 | > , transactions :: IORef (DQ.BankersDequeue (Integer,FileVersion)) 57 | > } 58 | > 59 | > openTF fpath = do 60 | > handle <- FH.openF fpath ReadWriteMode 1024 61 | > cmVar <- newMVar () 62 | > jQ <- newIORef DQ.empty 63 | > fQ <- newIORef DQ.empty 64 | > tQ <- newIORef DQ.empty 65 | > return $ TFile handle cmVar jQ fQ tQ 66 | > 67 | > closeTF tFile = do 68 | > FH.closeF $ fHandle tFile 69 | > q <- readIORef $ jQueue tFile 70 | > closeAll q 71 | > where 72 | > closeAll q = do 73 | > let (a,b) = popFront q 74 | > case a of 75 | > Nothing -> return () 76 | > Just a -> do JU.closeJournal $ getJournal a 77 | > closeAll b 78 | > 79 | > 80 | > 81 | > 82 | > 83 | > checkInBloomFilter bf bn = elemB bn bf 84 | > 85 | > readBlockFromOwnJournal :: TFile -> BlockNumber -> Transaction -> IO (Maybe BS.ByteString) 86 | > readBlockFromOwnJournal tf bn d = do 87 | > case (tType d) of 88 | > ReadOnly -> return Nothing 89 | > ReadWrite _ j -> JU.readFromJournal j bn 90 | > 91 | > -- | First , if it has its own journal , then it should read from that 92 | > -- else read a block from the most recent journal that contains it 93 | > -- else read it from the database 94 | > 95 | > 96 | > readBlockJ :: TFile -> BlockNumber -> Transaction -> IO BS.ByteString 97 | > readBlockJ tf bn d = do 98 | > r <- readBlockFromOwnJournal tf bn d 99 | > case r of 100 | > Just x -> do 101 | > return x 102 | > Nothing -> do 103 | > q <- readIORef $ jQueue tf 104 | > if DQ.null q then do return BS.empty else func q bn 105 | > where 106 | > func q bn = case popFront q of 107 | > (Just jInfo, q') -> 108 | > case checkInBloomFilter (getBloomFilter jInfo) bn of 109 | > False -> func q' bn 110 | > True -> do 111 | > d <- JU.readFromJournal (getJournal jInfo) bn 112 | > case d of 113 | > Just x -> return x 114 | > Nothing -> func q' bn 115 | > (Nothing , _) -> do 116 | > FH.readBlock (fHandle tf) bn -- read from database file 117 | > 118 | > -- | Returns True if there is a block from bli which is probably in the list of bloom filters 119 | > checkF :: [JBloom] -> BlockList -> IO Bool 120 | > {-checkF jbl bli = any id [elemB bn jb | jb <- jbl , bn <- bli]-} 121 | > checkF [] bli = return False 122 | > checkF js bli = do 123 | > --print $ checkOneBlock js 0 124 | > (f,rest) <- getBlock bli 125 | > print f 126 | > --print $ "Bloom List " ++ (show js) 127 | > --print $ "Block List " ++ (show (blocks bli)) 128 | > case f of 129 | > Just x -> if checkOneBlock js (toInteger x) then return True else checkF js rest 130 | > Nothing -> return False 131 | > where 132 | > checkOneBlock [] bl = False 133 | > checkOneBlock (f:fs) bl = if elemB bl f then True else checkOneBlock fs bl 134 | > 135 | > 136 | > getJInfoList :: JId -> JId -> DQ.BankersDequeue JInfo -> [JInfo] 137 | > getJInfoList id1 id2 q = takeWhile (\q -> (getfv q) /= id1) $ takeBack (Data.Dequeue.length q) q 138 | > 139 | > -- | checkFailure returns True when transaction has to fail. 140 | > -- The transaction will fail only in the following cases : - 141 | > -- 1. The set of readBlocks of the current transaction and writeBlocks of the failed transaction is not disjoint 142 | > -- 2. The set of the readBlocks of current transaction and the blocks changed in between old fileversion and new fileversion is not disjoint 143 | > -- TODO:Union of BloomFilter 144 | > checkSuccess :: FileVersion -> FileVersion -> TFile -> BlockList -> IO Bool 145 | > checkSuccess oldfv newfv tf bli = not <$> checkFailure oldfv newfv tf bli 146 | > checkFailure :: FileVersion 147 | > -> FileVersion 148 | > -> TFile 149 | > -> BlockList 150 | > -> IO Bool 151 | > checkFailure oldfv newfv tf bli = do 152 | > {-fq <- readIORef (failedQueue tf)-} 153 | > {-let fli = map snd (takeFront (Data.Dequeue.length fq) fq)-} 154 | > {-case checkF fli bli of-} 155 | > {-True -> return True-} 156 | > {-False -> do-} 157 | > q <- readIORef (jQueue tf) 158 | > let jli = getJInfoList oldfv newfv q 159 | > let jbl = map (getBloomFilter) jli 160 | > print $ map getfv jli 161 | > f <- checkF jbl bli 162 | > print f 163 | > return f 164 | > 165 | > -- | Interface to maintain list of read Blocks 166 | > -- Only that much data is kept in the memory which can be fit in a file 167 | > -- Rest is written on to the hard disk . 168 | > 169 | > -- Size of empty list is 8 bytes 170 | > data BlockList = BlockList { 171 | > blocks :: [Word64] 172 | > , size :: Int 173 | > , transH :: FH.FHandle 174 | > } 175 | > 176 | > addBlock :: Word64 -> BlockList -> IO BlockList 177 | > addBlock b bl = do 178 | > let s = size bl 179 | > let blkSize = FH.blockSize $ transH bl 180 | > let blks = blocks bl 181 | > if s + 8 > blkSize 182 | > then do 183 | > FH.appendBlock (transH bl) $ encode blks 184 | > return $ BlockList [b] 16 (transH bl) 185 | > else 186 | > return $ BlockList (b:blks) (s+8) (transH bl) 187 | > 188 | > getBlock :: BlockList -> IO (Maybe Word64,BlockList) 189 | > getBlock bl = do 190 | > let s = size bl 191 | > let blkSize = FH.blockSize $ transH bl 192 | > let blks = blocks bl 193 | > if s == 8 194 | > then do 195 | > bs <- FH.getLastBlock $ transH bl 196 | > case bs of 197 | > Just bs -> do 198 | > let ls = decode bs 199 | > case ls of 200 | > Right (x:xs) -> return (Just x,BlockList xs ((1 + Prelude.length xs)*8) (transH bl)) 201 | > Left err -> return (Nothing , BlockList [] 8 (transH bl)) 202 | > Nothing -> return (Nothing , BlockList [] 8 (transH bl)) 203 | > else do 204 | > let (b:bs) = blks 205 | > return (Just b , BlockList bs (s-8) (transH bl)) 206 | 207 | 208 | -------------------------------------------------------------------------------- /src/System/HaskDB/Journal.lhs: -------------------------------------------------------------------------------- 1 | > module System.HaskDB.Journal where 2 | > 3 | > import System.IO 4 | > import System.Directory 5 | > import System.Random 6 | > import Control.Applicative 7 | > import Data.Map as Map 8 | > import Data.Serialize 9 | > import Data.Text 10 | > import Data.Maybe 11 | > import Data.ByteString as BS 12 | > import qualified Data.ByteString.Char8 as BSC 13 | > import qualified System.HaskDB.FileHandling as FH 14 | > import System.HaskDB.FileHeader 15 | > import Data.IORef 16 | 17 | Whenever a block is to be written to the main Transaction File , we can essentially follow one of the following two approaches: - 18 | 19 | * Copy the original block to the Journal and try writing the changed block to the main file. If the transaction goes through successfully , we can discard the Journal . This approach is simpler to implement and we started with this approach in the beginning. But we came to realize that this would be highly inefficient . 20 | * Our current approach is based on an idea similar to Write Ahead Logging in Sqlite. Instead of writing the "old data" to journal , we write the new data first to the Journal and then eventually move it to the main file. But this approach means the main file alone is no more the only place to read data from. The main file with all the journals together really give us the latest picture of our data. 21 | 22 | A Journal is a temporary file defined per transaction.It is used to keep track of the blocks changed by a transaction. It stores the new data that needs to be eventually written to the main database. 23 | 24 | Following are some trivial types for journal 25 | 26 | > type JHandle = FH.FHandle 27 | > type OldBlock = Integer 28 | > type JId = Integer 29 | 30 | A journal object requires a unique identifier , file handles of the header 31 | file , journal file and the database file and an index storing a mapping of block number in the 32 | database to block number in journal file. 33 | 34 | > data Journal = Journal { journalID :: JId -- filename prefix of Journal 35 | > , hHandle :: FH.FHandle -- Handle for the header file 36 | > , jHandle :: FH.FHandle -- Handle for the Journal file 37 | > , dHandle :: FH.FHandle -- Handle for the database 38 | > , oldBlocks :: Map Integer Integer 39 | > -- Map from block number in database to block number 40 | > -- of the journal 41 | > } 42 | 43 | Closing the Journal object requires closing the handle of both the header file 44 | as well as the journal file. 45 | 46 | > closeJournal j = do 47 | > FH.closeF $ jHandle j 48 | > FH.closeF $ hHandle j 49 | 50 | > -- | Find all the journals present on the disk 51 | > findJournals :: FilePath -> IO [JId] 52 | > findJournals dp = do 53 | > files <- getDirectoryContents dp 54 | > return [ read (Data.Text.unpack id) 55 | > | file <- files 56 | > , id <- maybeToList (stripSuffix (Data.Text.pack ".header") 57 | > (Data.Text.pack file)) 58 | > ] 59 | > -- return a list of JIds 60 | > 61 | > -- | Create a new unique Journal file 62 | > newJournal :: FH.FHandle -> IO Journal 63 | > newJournal dh = do 64 | > gen <- newStdGen 65 | > jid <- atomicModifyIORef (FH.journalId dh) (\a -> (a+1,a)) 66 | > let filename = show jid 67 | > let header = filename ++ ".header" 68 | > let journal = filename ++ ".journal" 69 | > headerHandle <- FH.openF header ReadWriteMode 1024 70 | > journalHandle <- FH.openF journal ReadWriteMode 1024 71 | > let fJournal = Journal { journalID = jid 72 | > , hHandle = headerHandle 73 | > , jHandle = journalHandle 74 | > , dHandle = dh 75 | > , oldBlocks = Map.empty 76 | > } 77 | > return fJournal 78 | > 79 | > -- | Build a Journal from the journal file on disk 80 | > buildJournal :: JId -> FH.FHandle -> IO Journal 81 | > buildJournal id dh = do 82 | > let filename = show id 83 | > let header = filename ++ ".header" 84 | > let journal = filename ++ ".journal" 85 | > headerHandle <- FH.openF header ReadWriteMode 1024 86 | > journalHandle <- FH.openF journal ReadWriteMode 1024 87 | > retreivedMap <- readHeader headerHandle 88 | > let fJournal = Journal { journalID = id 89 | > , hHandle = headerHandle 90 | > , jHandle = journalHandle 91 | > , dHandle = dh 92 | > , oldBlocks = retreivedMap 93 | > } 94 | > return fJournal 95 | > 96 | > 97 | > -- | Reads all the blocks in a Journal 98 | > readAllBlocksFrom :: Journal -> IO [BS.ByteString] 99 | > readAllBlocksFrom j = undefined 100 | > 101 | > -- | Given a block number in the database file , read it from the Journal 102 | > readFromJournal :: Journal -> Integer -> IO (Maybe BS.ByteString) 103 | > readFromJournal j bn = do 104 | > let l = Map.lookup bn (oldBlocks j) 105 | > case l of 106 | > Just val -> Just <$> FH.readBlock (jHandle j) val 107 | > Nothing -> return Nothing 108 | > 109 | > -- | Writes the header to the header file 110 | > writeHeader :: Journal -> IO () 111 | > writeHeader j = do 112 | > let s = encode (oldBlocks j) 113 | > FH.writeAll (hHandle j) s 114 | > 115 | > -- | Reads the header information from the header file 116 | > readHeader :: FH.FHandle -> IO (Map Integer Integer) 117 | > readHeader fh = do 118 | > val <- FH.readAll fh 119 | > let (Right m) = decode (val) --Either String a 120 | > return m 121 | > 122 | > -- | Write to a journal given block number and blockData 123 | > writeToJournal :: Journal -> Integer -> BS.ByteString -> IO Journal 124 | > writeToJournal j bn bd = do 125 | > val <- FH.appendBlock (jHandle j) bd 126 | > let newMap = insert bn val (oldBlocks j) 127 | > let fJournal = Journal { journalID = journalID j 128 | > , hHandle = hHandle j 129 | > , jHandle = jHandle j 130 | > , dHandle = dHandle j 131 | > , oldBlocks = newMap 132 | > } 133 | > writeHeader fJournal 134 | > return fJournal 135 | 136 | > -- | To zero out the journal file 137 | > resetJournal :: Journal -> IO () 138 | > resetJournal = FH.truncateF . jHandle 139 | > 140 | > -- | Commit the Journal by writing the new FileVersion to its header 141 | > -- change this later 142 | > commitJournal :: Journal -> IO () 143 | > commitJournal j = changeFileVersion (dHandle j) 144 | > 145 | > 146 | > -- | Replay the data from the Journal to bring back the database into a consistent 147 | > -- state in case of a power failure 148 | > -- Read every block from the journal and write to the database 149 | > replayJournal :: Journal -> IO () 150 | > replayJournal j = do 151 | > let li = toAscList $ oldBlocks j --potential speedup if ascending? 152 | > sequence_ $ Prelude.map readAndWrite li 153 | > where 154 | > readAndWrite :: (Integer,Integer) -> IO() 155 | > readAndWrite (bn,jbn) = do 156 | > maybebd <- readFromJournal j bn 157 | > case maybebd of 158 | > Just bd -> FH.writeBlock (dHandle j) bn bd 159 | > Nothing -> return () 160 | 161 | > test = do 162 | > d <- FH.openF "abc.b" ReadWriteMode 1024 -- Truncates to zero length file 163 | > j <- newJournal d 164 | > k <- writeToJournal j 256 (BSC.pack "Block number 256") 165 | > l <- writeToJournal k 666 (BSC.pack "Block number 666") 166 | > let x = Map.lookup 256 (oldBlocks l) 167 | > let y = Map.lookup 666 (oldBlocks l) 168 | > r1 <- readFromJournal l 256 169 | > r2 <- readFromJournal l 666 170 | > case r1 of 171 | > Just v -> BS.putStrLn v 172 | > _ -> return () 173 | 174 | 175 | -------------------------------------------------------------------------------- /refs/slidy/slidy.css: -------------------------------------------------------------------------------- 1 | /* slidy.css 2 | 3 | Copyright (c) 2005-2010 W3C (MIT, ERCIM, Keio), All Rights Reserved. 4 | W3C liability, trademark, document use and software licensing 5 | rules apply, see: 6 | 7 | http://www.w3.org/Consortium/Legal/copyright-documents 8 | http://www.w3.org/Consortium/Legal/copyright-software 9 | */ 10 | body 11 | { 12 | margin: 0 0 0 0; 13 | padding: 0 0 0 0; 14 | width: 100%; 15 | height: 100%; 16 | color: black; 17 | background-color: white; 18 | font-family: "URW Palladio L", "Palatino Linotype", sans-serif; 19 | font-size: 14pt; 20 | } 21 | 22 | code 23 | { 24 | font-family: "DejaVu Sans Mono", monospace; 25 | } 26 | 27 | div.toolbar { 28 | position: fixed; z-index: 200; 29 | top: auto; bottom: 0; left: 0; right: 0; 30 | height: 1.2em; text-align: right; 31 | padding-left: 1em; 32 | padding-right: 1em; 33 | font-size: 60%; 34 | color: red; 35 | background-color: rgb(240,240,240); 36 | border-top: solid 1px rgb(180,180,180); 37 | } 38 | 39 | div.toolbar span.copyright { 40 | color: black; 41 | margin-left: 0.5em; 42 | } 43 | 44 | div.initial_prompt { 45 | position: absolute; 46 | z-index: 1000; 47 | bottom: 1.2em; 48 | width: 100%; 49 | background-color: rgb(200,200,200); 50 | opacity: 0.35; 51 | background-color: rgb(200,200,200, 0.35); 52 | cursor: pointer; 53 | } 54 | 55 | div.initial_prompt p.help { 56 | text-align: center; 57 | } 58 | 59 | div.initial_prompt p.close { 60 | text-align: right; 61 | font-style: italic; 62 | } 63 | 64 | div.slidy_toc { 65 | position: absolute; 66 | z-index: 300; 67 | width: 60%; 68 | max-width: 30em; 69 | height: 30em; 70 | overflow: auto; 71 | top: auto; 72 | right: auto; 73 | left: 4em; 74 | bottom: 4em; 75 | padding: 1em; 76 | background: rgb(240,240,240); 77 | border-style: solid; 78 | border-width: 2px; 79 | font-size: 60%; 80 | } 81 | 82 | div.slidy_toc .toc_heading { 83 | text-align: center; 84 | width: 100%; 85 | margin: 0; 86 | margin-bottom: 1em; 87 | border-bottom-style: solid; 88 | border-bottom-color: rgb(180,180,180); 89 | border-bottom-width: 1px; 90 | } 91 | 92 | div.slide { 93 | z-index: 20; 94 | margin: 0 0 0 0; 95 | padding-top: 0; 96 | padding-bottom: 0; 97 | padding-left: 20px; 98 | padding-right: 20px; 99 | border-width: 0; 100 | clear: both; 101 | top: 0; 102 | bottom: 0; 103 | left: 0; 104 | right: 0; 105 | line-height: 120%; 106 | background-color: transparent; 107 | } 108 | 109 | div.slide > div.figure { 110 | text-align: center 111 | } 112 | 113 | div.background { 114 | display: none; 115 | } 116 | 117 | div.handout { 118 | margin-left: 20px; 119 | margin-right: 20px; 120 | } 121 | 122 | div.slide.titlepage { 123 | text-align: center; 124 | } 125 | 126 | div.slide.titlepage h1 { 127 | padding-top: 10%; 128 | margin-right: 0; 129 | } 130 | 131 | div.slide h1 { 132 | padding-left: 0; 133 | padding-right: 20pt; 134 | padding-top: 4pt; 135 | padding-bottom: 4pt; 136 | margin-top: 0; 137 | margin-left: 0; 138 | margin-right: 60pt; 139 | margin-bottom: 0.5em; 140 | display: block; 141 | font-size: 160%; 142 | line-height: 1.2em; 143 | background: transparent; 144 | } 145 | 146 | div.toc { 147 | position: absolute; 148 | top: auto; 149 | bottom: 4em; 150 | left: 4em; 151 | right: auto; 152 | width: 60%; 153 | max-width: 30em; 154 | height: 30em; 155 | border: solid thin black; 156 | padding: 1em; 157 | background: rgb(240,240,240); 158 | color: black; 159 | z-index: 300; 160 | overflow: auto; 161 | display: block; 162 | visibility: visible; 163 | } 164 | 165 | div.toc-heading { 166 | width: 100%; 167 | border-bottom: solid 1px rgb(180,180,180); 168 | margin-bottom: 1em; 169 | text-align: center; 170 | } 171 | 172 | pre { 173 | font-size: 80%; 174 | font-weight: bold; 175 | line-height: 120%; 176 | padding-top: 0.2em; 177 | padding-bottom: 0.2em; 178 | padding-left: 1em; 179 | padding-right: 1em; 180 | border-style: solid; 181 | border-left-width: 1em; 182 | border-top-width: thin; 183 | border-right-width: thin; 184 | border-bottom-width: thin; 185 | border-color: #95ABD0; 186 | color: #00428C; 187 | background-color: #E4E5E7; 188 | } 189 | 190 | li pre { margin-left: 0; } 191 | 192 | blockquote { font-style: italic } 193 | 194 | img { background-color: transparent } 195 | 196 | p.copyright { font-size: smaller } 197 | 198 | .center { text-align: center } 199 | .footnote { font-size: smaller; margin-left: 2em; } 200 | 201 | a img { border-width: 0; border-style: none } 202 | 203 | a:visited { color: navy } 204 | a:link { color: navy } 205 | a:hover { color: red; text-decoration: underline } 206 | a:active { color: red; text-decoration: underline } 207 | 208 | a {text-decoration: none} 209 | .navbar a:link {color: white} 210 | .navbar a:visited {color: yellow} 211 | .navbar a:active {color: red} 212 | .navbar a:hover {color: red} 213 | 214 | ul { list-style-type: square; } 215 | ul ul { list-style-type: disc; } 216 | ul ul ul { list-style-type: circle; } 217 | ul ul ul ul { list-style-type: disc; } 218 | li { margin-left: 0.5em; margin-top: 0.5em; font-weight: bold } 219 | li li { font-size: 85%; font-weight: normal } 220 | li li li { font-size: 85%; font-weight: normal } 221 | strong { color: red; } 222 | li li strong { color: black; } 223 | /* pandoc's rules about when to insert paragraphs don't interact well 224 | with the requirement for blank lines around code blocks. Let's just 225 | neutralize the effects of p in bullets. */ 226 | li > p { margin: 0em; } 227 | 228 | div dt 229 | { 230 | margin-left: 0; 231 | margin-top: 1em; 232 | margin-bottom: 0.5em; 233 | font-weight: bold; 234 | } 235 | div dd 236 | { 237 | margin-left: 2em; 238 | margin-bottom: 0.5em; 239 | } 240 | 241 | 242 | p,pre,ul,ol,blockquote,h2,h3,h4,h5,h6,dl,table { 243 | margin-left: 1em; 244 | margin-right: 1em; 245 | } 246 | 247 | p.subhead { font-weight: bold; margin-top: 2em; } 248 | 249 | .smaller { font-size: smaller } 250 | .bigger { font-size: 130% } 251 | 252 | td,th { padding: 0.2em } 253 | 254 | ul { 255 | margin: 0.5em 1.5em 0.5em 1.5em; 256 | padding: 0; 257 | } 258 | 259 | ol { 260 | margin: 0.5em 1.5em 0.5em 1.5em; 261 | padding: 0; 262 | } 263 | 264 | ul { list-style-type: square; } 265 | ul ul { list-style-type: disc; } 266 | ul ul ul { list-style-type: circle; } 267 | ul ul ul ul { list-style-type: disc; } 268 | 269 | ul li { 270 | list-style: square; 271 | margin: 0.1em 0em 0.6em 0; 272 | padding: 0 0 0 0; 273 | line-height: 140%; 274 | } 275 | 276 | ol li { 277 | margin: 0.1em 0em 0.6em 1.5em; 278 | padding: 0 0 0 0px; 279 | line-height: 140%; 280 | list-style-type: decimal; 281 | } 282 | 283 | li ul li { 284 | font-size: 85%; 285 | font-style: normal; 286 | list-style-type: disc; 287 | background: transparent; 288 | padding: 0 0 0 0; 289 | } 290 | li li ul li { 291 | font-size: 85%; 292 | font-style: normal; 293 | list-style-type: circle; 294 | background: transparent; 295 | padding: 0 0 0 0; 296 | } 297 | li li li ul li { 298 | list-style-type: disc; 299 | background: transparent; 300 | padding: 0 0 0 0; 301 | } 302 | 303 | li ol li { 304 | list-style-type: decimal; 305 | } 306 | 307 | 308 | li li ol li { 309 | list-style-type: decimal; 310 | } 311 | 312 | /* 313 | setting class="outline on ol or ul makes it behave as an 314 | ouline list where blocklevel content in li elements is 315 | hidden by default and can be expanded or collapsed with 316 | mouse click. Set class="expand" on li to override default 317 | */ 318 | 319 | ol.outline li:hover { cursor: pointer } 320 | ol.outline li.nofold:hover { cursor: default } 321 | 322 | ul.outline li:hover { cursor: pointer } 323 | ul.outline li.nofold:hover { cursor: default } 324 | 325 | ol.outline { list-style:decimal; } 326 | ol.outline ol { list-style-type:lower-alpha } 327 | 328 | ol.outline li.nofold { 329 | padding: 0 0 0 20px; 330 | background: transparent url(../graphics/nofold-dim.gif) no-repeat 0px 0.5em; 331 | } 332 | ol.outline li.unfolded { 333 | padding: 0 0 0 20px; 334 | background: transparent url(../graphics/fold-dim.gif) no-repeat 0px 0.5em; 335 | } 336 | ol.outline li.folded { 337 | padding: 0 0 0 20px; 338 | background: transparent url(../graphics/unfold-dim.gif) no-repeat 0px 0.5em; 339 | } 340 | ol.outline li.unfolded:hover { 341 | padding: 0 0 0 20px; 342 | background: transparent url(../graphics/fold.gif) no-repeat 0px 0.5em; 343 | } 344 | ol.outline li.folded:hover { 345 | padding: 0 0 0 20px; 346 | background: transparent url(../graphics/unfold.gif) no-repeat 0px 0.5em; 347 | } 348 | 349 | ul.outline li.nofold { 350 | padding: 0 0 0 20px; 351 | background: transparent url(../graphics/nofold-dim.gif) no-repeat 0px 0.5em; 352 | } 353 | ul.outline li.unfolded { 354 | padding: 0 0 0 20px; 355 | background: transparent url(../graphics/fold-dim.gif) no-repeat 0px 0.5em; 356 | } 357 | ul.outline li.folded { 358 | padding: 0 0 0 20px; 359 | background: transparent url(../graphics/unfold-dim.gif) no-repeat 0px 0.5em; 360 | } 361 | ul.outline li.unfolded:hover { 362 | padding: 0 0 0 20px; 363 | background: transparent url(../graphics/fold.gif) no-repeat 0px 0.5em; 364 | } 365 | ul.outline li.folded:hover { 366 | padding: 0 0 0 20px; 367 | background: transparent url(../graphics/unfold.gif) no-repeat 0px 0.5em; 368 | } 369 | 370 | /* for slides with class "title" in table of contents */ 371 | a.titleslide { font-weight: bold; font-style: italic } 372 | 373 | /* 374 | hide images for work around for save as bug 375 | where browsers fail to save images used by CSS 376 | */ 377 | img.hidden { display: none; visibility: hidden } 378 | div.initial_prompt { display: none; visibility: hidden } 379 | 380 | div.slide { 381 | visibility: visible; 382 | position: inherit; 383 | } 384 | div.handout { 385 | border-top-style: solid; 386 | border-top-width: thin; 387 | border-top-color: black; 388 | } 389 | 390 | @media screen { 391 | .hidden { display: none; visibility: visible } 392 | 393 | div.slide.hidden { display: block; visibility: visible } 394 | div.handout.hidden { display: block; visibility: visible } 395 | div.background { display: none; visibility: hidden } 396 | body.single_slide div.initial_prompt { display: block; visibility: visible } 397 | body.single_slide div.background { display: block; visibility: visible } 398 | body.single_slide div.background.hidden { display: none; visibility: hidden } 399 | body.single_slide .invisible { visibility: hidden } 400 | body.single_slide .hidden { display: none; visibility: hidden } 401 | body.single_slide div.slide { position: absolute } 402 | body.single_slide div.handout { display: none; visibility: hidden } 403 | } 404 | 405 | @media print { 406 | .hidden { display: block; visibility: visible } 407 | 408 | div.slide pre { font-size: 60%; padding-left: 0.5em; } 409 | div.toolbar { display: none; visibility: hidden; } 410 | div.slidy_toc { display: none; visibility: hidden; } 411 | div.background { display: none; visibility: hidden; } 412 | div.slide { page-break-before: always } 413 | /* :first-child isn't reliable for print media */ 414 | div.slide.first-slide { page-break-before: avoid } 415 | } 416 | 417 | -------------------------------------------------------------------------------- /src/System/HaskDB/Transactions.lhs: -------------------------------------------------------------------------------- 1 | Transactions.lhs 2 | ================================================ 3 | 4 | > {-# LANGUAGE NoMonomorphismRestriction #-} 5 | > -- | File Transactions Module . 6 | > module System.HaskDB.Transactions ( 7 | > runTransaction 8 | > , retryTransaction 9 | > , readBlockT 10 | > , writeBlockT 11 | > , openTF 12 | > , closeTF 13 | > , sequencer 14 | > ) where 15 | > import Control.Concurrent 16 | > import qualified Data.ByteString as BS 17 | > import qualified System.HaskDB.FileHandling as FH 18 | > import System.HaskDB.FileHeader 19 | > import Data.Maybe 20 | > import System.IO hiding (withFile) 21 | > import System.HaskDB.Journal 22 | > import System.HaskDB.TransactionFH hiding(checkFailure) 23 | > import qualified Data.BloomFilter as BF 24 | > import qualified Data.Dequeue as DQ 25 | > import Data.BloomFilter.Hash (cheapHashes) 26 | > import Data.IORef 27 | > import Data.Unique 28 | > import Control.Applicative 29 | > import Control.Exception 30 | 31 | The main task is to capture the transaction into a datatype first. Here we are implementing a very basic version of transactions. So our transaction system only provide two type of operations to be performed on the file. 32 | 33 | * **ReadBlock** is to read the data from the given block number. 34 | * **WriteBlock** is to write the given data on the block number provide. 35 | 36 | We can also think of adding operations like append block , modify block etc. , but to keep it simple we only support these two basic operations. 37 | 38 | Now lets look at the data definition of the File Transaction (FT) data type . 39 | 40 | 41 | > -- | Transaction DataType 42 | > data FT a = 43 | > Done a | -- ^ Any value in the FT monad will be captured in Done a. 44 | > ReadBlock BlockNumber (BS.ByteString -> FT a) | -- ^ FT a here represents 45 | > -- rest of the computation. This follows from the continuation passing style. 46 | > WriteBlock BlockNumber BS.ByteString (FT a) -- ^ FT a here is similar to 47 | > -- above to have continuations. 48 | 49 | Dont be scared from the types of ReadBlock and WriteBlock. We will see later how it helps in actually passing the continuations. 50 | 51 | Lets see the monad definiton of the FT datatype. 52 | 53 | > -- | Monad Definition for the Transaction. 54 | > instance Monad FT where 55 | > return = Done 56 | > m >>= f = case m of 57 | > Done a -> f a 58 | > ReadBlock bn c -> ReadBlock bn (\i -> c i >>= f) 59 | > WriteBlock bn x c -> WriteBlock bn x (c >>= f) 60 | 61 | Here we will see how the types of ReadBlock and WriteBlock actually help in our continuation passing style of programming. Lets see a simple example of interface our implementation provides . 62 | 63 | Consider the famous banking example for transactions. We want to transfer x fund from account A to account B. 64 | 65 | Lets assume that fund informations of A and B are stored in the same file at block number a and b respectively. 66 | 67 | Here is a function which deposits x amount to the given account. 68 | 69 | > deposit a x = do 70 | > block <- ReadBlock a return 71 | > WriteBlock a (increase block x) (return ()) 72 | > where 73 | > increase bs x = undefined -- amount bs = amount bs + x 74 | 75 | Lets see how this is translated to explicit notation without do notation. 76 | 77 | > -- (\ block -> WriteBlock a (increase block x) (return () ))) 78 | > -- ReadBlock a (\ block -> Done block >>= 79 | > -- (\ block -> WriteBlock a (increase block x) (return () ))) 80 | > -- ReadBlock a (\ block -> WriteBlock a (increase block x) (return ()) ) 81 | 82 | Looks like it got transformed to whatever we wanted. 83 | 84 | It was a liitle frustrating to write return while writing ReadBlock and WriteBlock. So lets define feh helpers to help us avoiding the repetitions. 85 | 86 | > -- | readBlockT to be used inside the FT Monad 87 | > readBlockT :: BlockNumber -> FT BS.ByteString 88 | > readBlockT = flip ReadBlock return 89 | > -- | writeBlockT to be used inside the FT Monad 90 | > writeBlockT :: BlockNumber -> BS.ByteString -> FT () 91 | > writeBlockT v x = WriteBlock v x $ return () 92 | 93 | Now we want to actually perform the transactions satisfying all the ACID guarantees. So we need to write a fucntion to actually convert our Transactions from FT monad to IO monad and perform them. 94 | 95 | According to the semantics of a transaction , a transaction can either fail or succeed. So we should provide atleast two types of functions to run a transaction which are as follows : 96 | 97 | > -- | Runs the given transaction on the file. 98 | > -- Transaction may fail in which case it returns Nothing. 99 | > runTransaction :: FT a -- ^ FileTransaction to be performed 100 | > -> TFile -- ^ File on which this transaction is to be performed 101 | > -> IO (Maybe a) 102 | > runTransaction = runT Nothing False 103 | > 104 | > -- | Runs the transaction on the file. 105 | > -- If transaction fails then repeats it with higher priority. 106 | > retryTransaction :: FT a -- ^ FileTransaction to be performed 107 | > -> TFile -- ^ File on which transaction is performed 108 | > -> IO a 109 | > retryTransaction ft tFile = fromJust <$> runT Nothing True ft tFile 110 | 111 | At this point before implementing anything else we are interested in how we will be actiually using them. Here I will also introduce you to the power of composing two transactions and running them as one. Lets comeback to our backing example. 112 | 113 | > transfer a b x = do 114 | > deposit a (-x) 115 | > deposit b x 116 | 117 | Here is the function to remove x amount from account A and deposit it to the account B. We have implemented a very loose semantics here as to not check if A's balance is less than 0 etc. I just wanted to show the power of composing functions. Now we can just do runTransaction on transfer to run this transaction. The semantics of runTransaction automatically takes care of all the possible failures and rollback in case of transaction failure. 118 | 119 | Now comes the core function of our implementation which actually perform all the actions. 120 | 121 | > runT :: Maybe Integer -- Nothing if transaction never failed else Just id 122 | > -> Bool -- True if the transaction is to be retried in case of failure 123 | > -> FT a -- Transaction 124 | > -> TFile -- The File on which to run this transaction 125 | > -> IO (Maybe a) 126 | > 127 | > runT failure retry ft tFile = do 128 | > -- Add the transaction to the transaction queue and get the current 129 | > -- fileversion. This has to be removed after commit is performed. 130 | > (tid,fileVersion) <- withSynch (FH.synchVar $ fHandle tFile) 131 | > (addToTransactionQ tFile) 132 | > let transFile = newTransactionFile tid 133 | > -- Performs the transaction on the journal file and commits. If commit 134 | > -- succeeded then return Just output else Nothing 135 | > maybeOut <- withFile transFile $ runAndCommit fileVersion tFile ft 136 | > atomicModifyIORef (transactions tFile) $ \q -> (deleteFromQueue q tid, ()) 137 | > if retry then retryIfFailed failure maybeOut fileVersion tid ft tFile 138 | > else return maybeOut 139 | > where 140 | > retryIfFailed :: Maybe Integer -> Maybe a -> FileVersion -> Integer 141 | > -> FT a -> TFile -> IO (Maybe a) 142 | > retryIfFailed Nothing out@(Just a) _ _ _ _ = return out 143 | > retryIfFailed (Just tid) out@(Just a) _ _ _ tFile = do 144 | > atomicModifyIORef (failedQueue tFile) 145 | > (\q -> (deleteFromQueue q tid,())) 146 | > return out 147 | > retryIfFailed Nothing Nothing fileVersion tid ft tFile = do 148 | > atomicModifyIORef (failedQueue tFile) 149 | > (\q -> (DQ.pushBack q $ (tid,fileVersion),())) 150 | > runT (Just tid) True ft tFile 151 | > retryIfFailed (Just tid) Nothing fileVersion _ ft tFile = 152 | > runT (Just tid) True ft tFile 153 | > 154 | > runAndCommit :: FileVersion -> TFile -> FT a -> FH.FHandle -> IO (Maybe a) 155 | > runAndCommit fv tFile ft fh = do 156 | > out <- trans ft tFile $ Transaction (BlockList [] 8 fh) ReadOnly 157 | > commit fv tFile out 158 | > 159 | > newTransactionFile :: Integer -> String 160 | > newTransactionFile tid = (show tid) ++ ".trans" 161 | > 162 | > addToTransactionQ :: TFile -> IO (Integer,FileVersion) 163 | > addToTransactionQ fh = do 164 | > fileVersion <- getFileVersion $ fHandle fh 165 | > tid <- maybe (atomicModifyIORef (FH.journalId $ fHandle fh) (\a-> (a+1,a))) return failure 166 | > atomicModifyIORef (transactions fh) 167 | > (\q -> (DQ.pushBack q $ (tid,fileVersion) , ())) 168 | > return (tid,fileVersion) 169 | > 170 | > deleteFromQueue :: DQ.BankersDequeue (Integer,a) -> Integer 171 | > -> DQ.BankersDequeue (Integer,a) 172 | > deleteFromQueue q id = do 173 | > let (a,newQ) = DQ.popFront q 174 | > case a of 175 | > Nothing -> newQ 176 | > Just e@(qid,bl) -> if qid /= id 177 | > then DQ.pushFront (deleteFromQueue newQ id) e 178 | > else newQ 179 | > 180 | > trans :: FT a -> TFile -> Transaction -> IO (a,Transaction) 181 | > trans (Done a) _ d = do 182 | > return (a,d) 183 | > trans (ReadBlock bn c) fh d = do 184 | > val <- readBlockJ fh bn d 185 | > rblcks <- addBlock (fromIntegral bn) (rBlocks d) 186 | > trans (c val) fh $ d {rBlocks = rblcks} 187 | > -- Experiment with the hashfunction and number of bits. 188 | > trans (WriteBlock bn x c) fh d = do 189 | > rw <- rToRw d (BF.emptyB (cheapHashes 20) 4096) (newJournal (fHandle fh)) 190 | > let newBl = BF.insertB bn (bloom $ tType rw) 191 | > j <- writeToJournal (journal $ tType rw) bn x 192 | > trans c fh (rw {tType = (tType rw) {bloom = newBl,journal = j}} ) 193 | 194 | All the journals of the transactions get aggregated over time which might result in poor read performances over time. So we need to actually checkpoint the commited journals back to the database file. 195 | 196 | > sequencer :: TFile -> IO () 197 | > sequencer fh = do 198 | > b <- readIORef $ jQueue fh 199 | > if DQ.null b then yield else do 200 | > let j = getJournal . fromJust $ DQ.first b 201 | > replayJournal j 202 | > FH.flushBuffer (fHandle fh) 203 | > popFromJournalQueue j fh 204 | > sequencer fh 205 | > where 206 | > popFromJournalQueue j fh = do 207 | > tq <- readIORef $ transactions fh 208 | > if checkToDelete (journalID j) tq then 209 | > atomicModifyIORef (jQueue fh) 210 | > (\q -> (snd $ DQ.popFront q , ())) 211 | > else do 212 | > yield 213 | > popFromJournalQueue j fh 214 | > checkToDelete jid tq = do 215 | > let (front , _) = DQ.popFront tq 216 | > case front of 217 | > Nothing -> True 218 | > Just (id,fv) -> fv >= jid 219 | > 220 | > 221 | 222 | Helpers : 223 | 224 | > commit :: FileVersion -> TFile -> (a,Transaction) -> IO (Maybe a) 225 | > commit oldFV fh (output,trans) = do 226 | > -- synch is used to prevent 2 commits from interleaving 227 | > out <- withSynch (commitSynch fh) 228 | > (do 229 | > newFV <- getFileVersion $ fHandle fh 230 | > print $ "Old fv " ++ (show oldFV) 231 | > cf <- checkSuccess oldFV newFV fh (rBlocks trans) 232 | > if not cf then return Nothing else do 233 | > case trans of 234 | > Transaction _ (ReadWrite bl jr) -> do 235 | > atomicModifyIORef (jQueue fh) 236 | > (\q -> (DQ.pushBack q $ JInfo jr bl (newFV + 1),())) 237 | > commitJournal jr 238 | > _ -> return () 239 | > return $ Just output ) 240 | > case trans of 241 | > Transaction _ t@(ReadWrite _ jl ) -> do FH.flushBuffer $ jHandle jl 242 | > FH.flushBuffer $ hHandle jl 243 | > otherwise -> return () 244 | > return out 245 | > 246 | > withSynch :: MVar () -> IO b -> IO b 247 | > withSynch synchVar = bracket_ (takeMVar synchVar) (putMVar synchVar ()) 248 | > withFile :: FilePath -> (FH.FHandle -> IO a) -> IO a 249 | > -- Blocksize taken to be 4096 Bytes 250 | > withFile fp = bracket (FH.openF fp ReadWriteMode 4096) FH.closeF 251 | > 252 | > rToRw :: Transaction -> BF.Bloom BlockNumber -> IO Journal -> IO Transaction 253 | > rToRw t@(Transaction rb ReadOnly) bl jl = do 254 | > j <- jl 255 | > return $ Transaction rb (ReadWrite bl j) 256 | > rToRw t _ _ = return t 257 | > tMap :: Transaction -> (BF.Bloom BlockNumber -> BF.Bloom BlockNumber) 258 | > -> (Journal -> Journal) -> Transaction 259 | > tMap t@(Transaction rb ReadOnly) _ _ = t 260 | > tMap (Transaction rb (ReadWrite bl jl)) fbl fjl = 261 | > Transaction rb (ReadWrite (fbl bl) (fjl jl)) 262 | -------------------------------------------------------------------------------- /refs/disk_based_transactions.md: -------------------------------------------------------------------------------- 1 | % Concurrent Disk Based Transactions in Haskell 2 | % Satvik Chauhan and Pankaj More 3 | % April 16 , 2012 4 | 5 | # Shared Resource Problem 6 | 7 | 13 | * **Problem** : Share a resource between multiple concurrent threads. 14 | * Results in several problems. 15 | * Lost update 16 |
 17 | +----------P1-------------+-----+------------P2------------+
 18 | |                         +  1  + w <- read(A)             |
 19 | | u <- read(A)            +  2  +                          |
 20 | | write(A,u+100)          +  3  +                          |
 21 | |                         +  4  + write(A,w+100)           |
 22 | +-------------------------+-----+--------------------------+
 23 | 
24 | * Incorrect summary problem 25 |
 26 | +----------P1-------------+-----+------------P2------------+
 27 | |                         +  1  + w <- read(A)             |
 28 | |                         +  2  + write(A,w-100)           |
 29 | | u1 <- read(A)           +  3  +                          |
 30 | | u2 <- read(B)           +  4  +                          |
 31 | | sum = a + b             +  5  +                          |
 32 | |                         +  6  + x <- read(B)             |
 33 | |                         +  7  + write(B,x+100)           |
 34 | +-------------------------+-----+--------------------------+
 35 | 
36 | 37 | # Shared Resource Problem (Cont) 38 | 39 | * **Solution** : Locks !! 40 | * Problem with Locks 41 | * Race Conditions if locks are forgotten. 42 | * Deadlocks from inconsistent lock ordering. 43 | * Uncaught exceptions might result in any of the above problems. 44 | * Coarse Locks hurt performance. 45 | * Locks don't compose. 46 | 47 | # Shared Resource Problem (Cont) 48 | 49 | * Two phase locking 50 | * Any transaction first must acquire locks for all the shared resources. 51 | * Perform the operations on the shared resources. 52 | * Release all the locks. 53 | * Still deadlock problem. 54 |
 55 | +----------P1-------------+-----+------------P2------------+
 56 | |                         +  1  + Acquire(A)               |
 57 | | Acquire(B)              +  2  +                          |
 58 | |                         +  3  + Acquire(B)               |
 59 | | Acquire(A)              +  4  +                          |
 60 | | Some operations         +  5  +                          |
 61 | | Release(B)              +  6  +                          |
 62 | | Release(A)              +  7  +                          |
 63 | |                         +  8  + Some operations          |
 64 | |                         +  9  + Release(A)               |
 65 | |                         +  10 + Release(B)               |
 66 | +-------------------------+-----+--------------------------+
 67 | 
68 | 69 | * Can prevent deadlocks by acquiring locks in certain fixed order. 70 | * Still hurts performance 71 | 72 | 73 | # Transactions 74 | 75 | * An optimistic way of managing shared resources. 76 | * A set of operations which are performed on a resource either fully or none at all. 77 | * Helps to maintain a consistent view of the resource. 78 |
 79 | +----------T1-------------+-----+------------T2------------+
 80 | |                         +  1  + w <- read(A)             |
 81 | | u1 <- read(A)           +  2  +                          |
 82 | | write(A+100)            +  3  +                          |
 83 | | commit                  +  4  +                          |
 84 | |                         +  5  + write(A,w-100)           |
 85 | |                         +  6  + x <- read(B)             |
 86 | |                         +  7  + write(B,x+100)           |
 87 | |                         +  8  + commit Fails             |
 88 | |                         +  9  + rollback                 |
 89 | +-------------------------+-----+--------------------------+
 90 | 
91 | * When T2 is rolled back , memory state is same as if T2 has never executed. 92 | 93 | # Transactions (Cont) 94 | 95 | * **Atomicity** 96 | * Transaction either commits or fails 97 | * **Consistency** 98 | * Resource is in a consistent state after a transaction commits. 99 | * **Isolation** 100 | * Unaware of the effects of concurrently running transactions. 101 | * **Durability** 102 | * Once a transaction commits , changes should persist. 103 | 104 | # Software Transactional Memory (STM) 105 | 106 | * New way of programming on multicore systems. 107 | * Optimistic way of running transactions. 108 | * Allows all transactions to run simultaneously. 109 | * Transactions perform changes to their own local buffer. 110 | * At the time of commit, decide success or failure. 111 | * Success : The changes become simultaneously visible to other threads. 112 | * Failure : No changes made to the memory. 113 | * Compose well. 114 | 115 | # Disk Based Transactions 116 | 117 | * Schedule 118 | * A sequence of instructions that specify the chronological 119 | order in which instructions of concurrent transactions are executed 120 | * A schedule for a set of transactions must consist of all instructions 121 | of those transactions 122 | * A schedule must preserve the order in which the instructions appear in each 123 | individual transaction. 124 | 125 | # Serial Schedule 126 | 127 | * A serial schedule in which T1 is followed by T2 128 |
129 | +----------T1-------------+-----+------------T2------------+
130 | | read(A)                 +  1  +                          |
131 | | A <- A * 2              +  2  +                          |
132 | | write(A)                +  3  +                          |
133 | |                         +  4  + read(A)                  |
134 | |                         +  5  + A <- A - 50              |
135 | |                         +  6  + write(A)                 |
136 | |                         +  7  + read(B)                  |
137 | |                         +  8  + B <- B + 50              |
138 | |                         +  9  + write(B)                 |
139 | +-------------------------+-----+--------------------------+
140 | 
141 | * A serial schedule in which T2 is followed by T1 142 |
143 | +----------T1-------------+-----+------------T2------------+
144 | |                         +  1  + read(A)                  |
145 | |                         +  2  + A <- A - 50              |
146 | |                         +  3  + write(A)                 |
147 | |                         +  4  + read(B)                  |
148 | |                         +  5  + B <- B + 50              |
149 | |                         +  6  + write(B)                 |
150 | | read(A)                 +  7  +                          |
151 | | A <- A * 2              +  8  +                          |
152 | | write(A)                +  9  +                          |
153 | +-------------------------+-----+--------------------------+
154 | 
155 | 156 | # Why interleave 157 | 158 | * In a serial schedule, if a transaction waits for an I/O operation to complete, 159 | CPU cycles are wasted 160 | * Other transactions may also be in line waiting for the completion of a 161 | transaction. (Hint: Convoy effect) 162 | * For these reasons, serial schedules are generally considered unacceptable in 163 | practice. 164 | * Interleaving could 165 | * utilise idle CPU cycles 166 | * improve "perceived performance" 167 | 168 | 169 | 170 | # Conflicting Instructions 171 | 172 | * Instructions Ii and Ij of transactions Ti and Tj respectively, conflict 173 | if and only if there exists some item Q accessed by both Ii and Ij, 174 | and at least one of these instructions wrote Q. 175 | * Intuitively, a conflict between Ii and Ij forces a (logical) temporal order between them. 176 | * If Ii and Ij are consecutive in a schedule and they do not conflict, their results would remain the same even if they had been interchanged in the schedule. 177 | 178 | # Serializabilty 179 | 180 | * If a schedule S can be transformed into a schedule S´ by a series of 181 | swaps of non-conflicting instructions, we say that S and S´ are 182 | equivalent 183 | * We say that a schedule S is serializable if it is equivalent to a serial schedule 184 | * Serializabilty provides us with *formalism* for valid interleaving 185 | * All possible equivalent schedules are candidates for valid concurrent execution 186 | 187 | # A Simplified View of Transactions 188 | 189 | * We ignore operations other than read and write instructions 190 | * We assume that transactions may perform arbitrary computations on data in local buffers in between reads 191 | and writes 192 | * Our simplified schedules consist of only read and write instructions 193 | 194 | # Lock Based Implementations 195 | 196 | * Global lock on the database file 197 | * Multiple readers can *share* locks in the absence of writer 198 | * Writer requires *exclusive* locks 199 | * Good for read heavy applications 200 | * Inefficient in case of multiple writers 201 | 202 | 203 | # Why Haskell? 204 | 205 | * Haskell's expressive power can improve productivity 206 | * Small language core provides big flexibility 207 | * Code can be very concise, speeding development 208 | * Get best of both worlds from compiled and interpreted languages 209 | 210 | * Haskell makes code easier to understand and maintain 211 | * Can dive into complex libraries and understand *what* the code 212 | is doing 213 | 214 | * Haskell can increase the robustness of systems 215 | * Strong typing catches many bugs at compile time 216 | * Functional code permits better testing methodologies 217 | * Can parallelize non-concurrent code without changing semantics 218 | * Concurrent programming abstractions resistant to data races 219 | 220 | * Lots of compiler Optimizations 221 | * Fusion, inlining etc 222 | 223 | # Haskell is a *pure* functional language 224 | 225 | * Unlike variables in imperative languages, Haskell bindings are 226 | * *immutable* - can only bind a symbol once in a give scope
227 | (We still call bound symbols "variables" though)
228 | 229 | ~~~ {.haskell} 230 | x = 5 231 | x = 6 -- error, cannot re-bind x 232 | ~~~ 233 | 234 | * *order-independent* - order of bindings in source code does not 235 | matter 236 | 237 | * *lazy* - definitions of symbols are evaluated only when needed 238 | 239 | ~~~ {.haskell} 240 | evens = map (*2) [1..] -- infinite list of even numbers 241 | main = print (take 50 evens) -- prints first 50 even numbers 242 | ~~~ 243 | 244 | * *recursive* 245 | 246 | # Haskell , Transactions and STM 247 | 248 | * Transactions Using Locks 249 | 250 | ~~~ {.haskell} 251 | transfer :: Account -> Account -> Int -> IO () 252 | transfer to from amount = do 253 | acquire fromLock 254 | acquire toLock 255 | withdraw from amount 256 | deposit to amount 257 | release fromLock 258 | release toLock 259 | ~~~ 260 | 261 | * Easy to result into deadlocks. 262 | 263 | # Haskell , Transactions and STM (Cont) 264 | 265 | * STM 266 | 267 | ~~~ {.haskell} 268 | withdraw :: Account -> Int -> STM () 269 | withdraw acc amount = do 270 | bal <- readTVar acc 271 | writeTVar acc (bal - amount) 272 | 273 | deposit :: Account -> Int -> STM () 274 | deposit acc amount = withdraw acc (-amount) 275 | 276 | transfer :: Account -> Account -> Int -> IO () 277 | transfer to from amount = atomically (do 278 | withdraw from amount 279 | deposit to amount) 280 | ~~~ 281 | 282 | * No deadlocks 283 | * Easy to compose 284 | * Type system prevents any IO operations to be performed inside a transaction. 285 | 286 | # Disk Based Transactions 287 | 288 | * Assumptions 289 | * Fsync syscall works as advertised 290 | * The data it reads is *exactly* the same that it previously wrote 291 | * Writing to disk adheres to block boundaries 292 | 293 | # Algorithm 294 | 295 | * A single transaction file on which all transactions are done 296 | * A transaction can be either ReadOnly or ReadWrite 297 | * Each read-write transaction has its own *log* 298 | * read-only transactions don't need any *logs* 299 | * A transaction first writes to its own *log* 300 | * Committing a transaction increments the file version of the transaction file 301 | * Reads from the latest valid *log* 302 | 303 | # Commit workflow 304 | 305 | ![](1.png) 306 | 307 | # Commit workflow (Cont) 308 | 309 | ![](2.png) 310 | 311 | # Commit workflow (Cont) 312 | 313 | ![](3.png) 314 | 315 | # Sequencer 316 | 317 | * Number of *logs* can grow very fast 318 | * Checkpoint performs the following operations 319 | * Merges all the data from *logs* onto the transaction file 320 | * Removes the *log* reference from the queue 321 | * Deletes the corresponding *log* file from disk 322 | * Sequencer can be called by the programmer at suitable time 323 | 324 | # ACIDity 325 | * **Atomicity** 326 | * Transaction either commits or fails 327 | * **Consistency** 328 | * The set of *valid logs* + transaction file is always consistent 329 | * **Isolation** 330 | * A transaction cannot see the effects of other running transactions 331 | * **Durability** 332 | * Once a transaction commits , fsync must ensure that the data is actually written onto the disk. 333 | 334 | # Performance Trade-off 335 | 336 | * There is a trade-off between average read performance and average write performance 337 | * Keep minimum number of *logs* for maximum read performance 338 | * Checkpoint as frequently as possible 339 | * For maximum write performance , delay checkpoints as far as possible 340 | * The optimum checkpoint frequency depends on the particular performance requirements of the application 341 | 342 | 343 | # Implementation Details 344 | 345 | * We have captured transaction in a monad. 346 | 347 | ~~~ {.haskell} 348 | -- Transactions . lhs 349 | -- | Transaction Datatype 350 | data FT a = 351 | Done a | 352 | ReadBlock BlockNumber ( ByteString -> FT a ) | 353 | WriteBlock BlockNumber ByteString ( FT a ) 354 | ~~~ 355 | 356 | * Allow 2 operations. 357 | * Read data from a block. 358 | * Write data to a block. 359 | 360 | * Monad Instance 361 | 362 | ~~~ {.haskell} 363 | -- Transactions . lhs 364 | -- | Monad Definition for the Transaction . 365 | instance Monad FT where 366 | return = Done 367 | m >>= f = case m of 368 | Done a -> f a 369 | ReadBlock bn c -> ReadBlock bn (\ i -> c i >>= f ) 370 | WriteBlock bn x c -> WriteBlock bn x ( c >>= f ) 371 | ~~~ 372 | 373 | 374 | # Implementation Details (Cont) 375 | 376 | * Example Usage: 377 | 378 | ~~~ {.haskell} 379 | deposit :: BlockNumber -> ByteString -> FT () 380 | deposit a x = do 381 | amount <- ReadBlock a return 382 | WriteBlock a (amount + x) (return ()) 383 | ~~~ 384 | 385 | * Explicit notation in terms of bind and return 386 | 387 | ~~~ {.haskell} 388 | ReadBlock a return >>= (\amount -> WriteBlock a (amount +x) (return())) 389 | ReadBlock a (\i -> return i >>= \amount -> WriteBlock a (amount + x) (return())) 390 | ReadBlock a (\i -> Done i >>= \amount -> WriteBlock a (amount + x) (return())) 391 | ReadBlock a (\i -> WriteBlock a (i + x) (return())) 392 | ~~~ 393 | 394 | # Implementation Details (Cont) 395 | 396 | * Defining monad instance has 2 advantages 397 | * Type Checker prevents the user from performing any other IO operations. 398 | 399 | ~~~ {.haskell} 400 | deposit :: BlockNumber -> ByteString -> FT () 401 | deposit a x = do 402 | amount <- ReadBlock a return 403 | print amount -- Not allowed by type checker 404 | WriteBlock a (amount + x) (return ()) 405 | ~~~ 406 | 407 | * Easy composition 408 | 409 | ~~~ {.haskell} 410 | transfer :: BlockNumber -> BlockNumber -> ByteString -> FT () 411 | transfer a b x = deposit a (-x) >> deposit b x 412 | ~~~ 413 | 414 | ~~~ {.haskell} 415 | transfer :: BlockNumber -> BlockNumber -> ByteString -> FT () 416 | transfer a b x = do 417 | deposit a (-x) 418 | deposit b x 419 | ~~~ 420 | 421 | # Implementation Details (Cont) 422 | 423 | * Running a transaction 424 | 425 | ~~~ {.haskell} 426 | runTransaction :: FT a -> TFile -> IO a 427 | runTransaction (transfer a b 100) 428 | retryTransaction :: FT a -> TFile -> IO a 429 | retryTransaction (transfer a b 100) 430 | ~~~ 431 | 432 | * runTransaction ensures all the properties of a transaction. 433 | * Transaction may fail while using runTransaction. 434 | * retryTransaction ensures that transaction is repeated until it succeeds. 435 | * Too many Journals might result in poor read performance. 436 | * Sequencer copies the data to the main file and remove the intermediate journal files. 437 | 438 | ~~~ {.haskell} 439 | sequencer :: TFile -> IO () 440 | ~~~ 441 | 442 | # Testing 443 | 444 | * Ensuring robustness and quality of the implementation. 445 | * QuickCheck : Tool for type based testing. 446 | * Invariants checked on random testcases generated from the function type. 447 | 448 | ~~~ {.haskell} 449 | qsort :: Ord a => [a] -> [a] 450 | qsort (x:xs) = qsort lhs ++ [x] ++ qsort rhs 451 | where lhs = filter (< x) xs 452 | rhs = filter (>= x) xs 453 | -- Quickcheck property to check 454 | prop_sortcheck xs = qsort (qsort xs) == qsort xs 455 | ~~~ 456 | 457 | ~~~ {.haskell} 458 | ghci> quickCheck (prop_sortcheck :: [Integer] -> Bool) 459 | *** Failed! Exception: 'qsort.hs:(3,1)-(5,32): Non-exhaustive patterns in function qsort' (after 1 test): 460 | [] 461 | ~~~ 462 | 463 | * Correcting the code 464 | 465 | ~~~ {.haskell} 466 | qsort [] = [] 467 | ~~~ 468 | 469 | ~~~ {.haskell} 470 | ghci> quickCheck (prop_sortcheck :: [Integer] -> Bool) 471 | +++ OK, passed 100 tests. 472 | ~~~ 473 | 474 | # Testing (Cont) 475 | 476 | * Keeping things pure. 477 | * Easier to reason about code with no side effects. 478 | * Simulate File system without actually performing IO for testing purposes. 479 | * Different types for actual code and testing code. 480 | * Type Classes. 481 | 482 | ~~~ {.haskell} 483 | -- Backend.hs 484 | class (Show a,Monad m) => Backend b m a | b -> m where 485 | data Handle :: * -> * 486 | type BlockNumber :: * 487 | open :: FilePath -> Mode -> m (Handle b) 488 | close :: Handle b -> m () 489 | readBlock ::(Eq BlockNumber)=>Handle b->BlockNumber->m a 490 | writeBlock ::(Eq BlockNumber)=>Handle b->BlockNumber->a->m () 491 | sync :: Handle b -> m () 492 | ~~~ 493 | 494 | # Testing (Cont) 495 | 496 | * File System being simulated in memory. 497 | 498 | ~~~ {.haskell} 499 | -- Backend.hs 500 | data TestDisk a = TestDisk { 501 | disk :: M.Map FilePath (File a) 502 | , buffers :: M.Map FilePath (File a) 503 | , bufferSize :: Int -- Current buffer Size 504 | , openFiles :: M.Map FilePath Mode 505 | } deriving (Eq,Show) 506 | data File a = File { 507 | blocks :: M.Map Int a 508 | , size :: Int 509 | } deriving (Eq, Show) 510 | 511 | data Mode = ReadWrite | Read | Write deriving (Eq,Show) 512 | ~~~ 513 | 514 | 515 | # Testing (Cont) 516 | 517 | * Instance of the Backend Class . Pure and no side effects. 518 | 519 | ~~~ {.haskell} 520 | instance Show a => Backend (TestDisk a) (State (TestDisk a)) a where 521 | data Handle (TestDisk a) = Handle FilePath 522 | type BlockNumber = Int 523 | open fp m = do 524 | disk <- get 525 | put $ openFile disk fp m 526 | return $ Handle fp 527 | close (Handle fp) = do 528 | disk <- get 529 | put $ closeFile disk fp 530 | readBlock (Handle fp) bn = do 531 | t <- get 532 | case readB t fp bn buffers of 533 | Nothing -> error "Block Not Found" 534 | Just a -> return a 535 | writeBlock (Handle fp) bn a = do 536 | t <- get 537 | put $ writeB t fp bn a 538 | sync (Handle fp) = do 539 | t <- get 540 | put $ flushBuffer t fp 541 | ~~~ 542 | 543 | 544 | 545 | 546 | 547 | -------------------------------------------------------------------------------- /refs/report.tex: -------------------------------------------------------------------------------- 1 | \documentclass[11pt,a4paper]{article} 2 | \usepackage{graphicx} 3 | \usepackage{alltt} 4 | \usepackage{color} 5 | \usepackage{fancyhdr} 6 | \usepackage{listings} 7 | \usepackage{multicol} 8 | \usepackage{hyperref} 9 | \pagestyle{fancy} 10 | \definecolor{gray_ulisses}{gray}{0.55} 11 | \definecolor{lightgray}{gray}{0.95} 12 | \definecolor{castanho_ulisses}{rgb}{0.71,0.33,0.14} 13 | \definecolor{preto_ulisses}{rgb}{0.41,0.20,0.04} 14 | \definecolor{green_ulises}{rgb}{0.2,0.75,0} 15 | \lstdefinelanguage{HaskellU} 16 | { 17 | basicstyle=\ttfamily\scriptsize, 18 | backgroundcolor=\color{lightgray}, 19 | %frameshape={RYRYNYYYY}{yny}{yny}{RYRYNYYYY}, %contornos... muito nice... 20 | sensitive=true, 21 | morecomment=[l][\color{gray_ulisses}\scriptsize]{--}, 22 | morecomment=[s][\color{gray_ulisses}\scriptsize]{\{-}{-\}}, 23 | morestring=[b]", 24 | stringstyle=\color{red}, 25 | showstringspaces=false, 26 | numbers=left, 27 | firstnumber=last, 28 | numberstyle=\tiny, 29 | numberblanklines=true, 30 | showspaces=false, 31 | showtabs=false, 32 | xleftmargin=10pt, 33 | xrightmargin=-5pt, 34 | emph= 35 | {[2] return,fromJust,readBlockT,writeBlockT,runTransaction,retryTransaction,sequencer,runT,transfer,deposit 36 | ,sync,open,close,readBlock,writeBlock,get,put,closeFile,openFile,readB,writeB,flushBuffer }, 37 | emphstyle={[2]\color{blue}}, 38 | emph= 39 | {[1] BS,ByteString, TFile,BlockNumber,FT,Maybe,IO,FilePath,Eq,Show,Monad,Handle,Mode,State,TestDisk,Backend,Int 40 | }, 41 | emphstyle={[1]\color{castanho_ulisses}}, 42 | emph= 43 | {[3] 44 | case,class,data,deriving,do,type,else,if,import,in,infixl,infixr,instance,let, 45 | module,of,primitive,then,type,where,ReadBlock,WriteBlock,Done,Nothing,Just,True,False 46 | }, 47 | emphstyle={[3]\color{preto_ulisses}\textbf}, 48 | emph= 49 | {[4] 50 | }, 51 | emphstyle={[4]\color{green_ulises}\textbf} 52 | } 53 | 54 | \lstnewenvironment{code}[1][] 55 | {\lstset{language=HaskellU,#1}} 56 | {} 57 | 58 | \title{ Concurrent Disk-Based Transactions in Haskell} 59 | \author{Satvik Chauhan \and Pankaj More} 60 | \date{{\small \today}} 61 | % 62 | \begin{document} 63 | \maketitle 64 | % 65 | \begin{abstract} 66 | Transactions form an integral part of any multi-user concurrent database system. Most databases implement transactions by various locking mechanism. 67 | In this project, we present a lock free composable file transaction system implemented in a high level functional programming language, Haskell.Our library allows easy composition of transactions and running them as a single atomic transaction thus providing a powerful abstraction in the programmer's tool-kit. The implementation is not only highly concurrent but also non blocking as there are no locks on files. We have exploited the space efficient probabilistic data structure called Bloom Filters to keep primary memory requirements low. 68 | 69 | 70 | \emph{Keywords:} Concurrency, Transactions, Databases, Haskell 71 | 72 | \end{abstract} 73 | 74 | % 75 | \section{Introduction} 76 | 77 | The major problem in a concurrent system with shared resource is how to share the same resource between all the processes. The traditional way of keeping data consistent is with locks, and we notify threads of changes using condition variables. The example of such mechanism is Haskell's MVar. The drawbacks of using such mechanism is : 78 | \begin{itemize} 79 | \item Race conditions due to forgotten locks 80 | \item Deadlocks resulting from inconsistent lock ordering 81 | \item Uncaught exceptions resulting in corruption 82 | \end{itemize} 83 | 84 | A transaction is an optimistic way of achieving the same thing without hurting the concurrent part as it allows all the operations to run concurrently while providing mechanisms to roll-back the resource to a consistent state in case of any failure. 85 | 86 | Formally, a transaction is a group of operations combined into a logical unit of work. Developer uses transactions to control and maintain the consistency and integrity of each action in a transaction, despite errors that might occur in the system either due to concurrency or hardware failure. In database context a transaction on a database is considered to be set of operations performed such that database is in a consistent state before and after the transaction and in case of failure the system must provide ways to roll-back partial transactions to bring the database back into a consistent state. Transaction in the database environment mainly serve two purposes: 87 | 88 | \begin{enumerate} 89 | \item To provide reliable units of work that allow correct recovery from failures and keep a database consistent even in cases of system failure, when execution stops (completely or partially) and many operations upon a database remain uncompleted, with unclear status. 90 | \item To provide isolation between programs accessing a database concurrently. If this isolation is not provided the programs outcome are possibly erroneous. 91 | \end{enumerate} 92 | 93 | Transactions provide an "all-or-nothing" proposition, stating that each work-unit performed in a database must either complete in its entirety or have no effect whatsoever. Further, the system must isolate each transaction from other transactions, results must conform to existing constraints in the database, and transactions that complete successfully must get written to durable storage. Thus, even in case of hardware failure a transaction once commited must persist. A transaction is expected satisfy some guarantees which are often called ACID guarantees: 94 | 95 | \begin{itemize} 96 | \item \emph{Atomicity} means a transaction can end only in two ways: either successfully, in which case all its effects are written to a durable storage and persist between power failures, or unsuccessfully, in which case it has no effect, it can be assumed that this transaction never happened. 97 | \item \emph{Consistency} just means that a transaction is written correctly, so that when it completes successfully, the database is in a consistent state. 98 | \item \emph{Isolation} means that the transaction appears to execute completely alone, even if, in fact, other transactions are running simultaneously. In other words, transaction always sees a consistent snapshot of the database and is totally unaware of the changes made by other transactions which are running concurrently with it. 99 | \item \emph{Durability} means that a successful transaction's changes are permanent and persist in all possible sorts of failures. In practice this means to be written on disk. 100 | \end{itemize} 101 | 102 | Software Transactional Memory (STM) is a new way of programming in a shared memory parallel processors. STM gives us a few simple, but powerful, tools with which we can address most of these problems. We execute a block of actions as a transaction using the atomically combinator. Once we enter the block, other threads cannot see any modifications we make until we exit, nor can our thread see any changes made by other threads. These two properties mean that our execution is isolated.Upon exit from a transaction, exactly one of the following things will occur. 103 | \begin{itemize} 104 | \item If no other thread concurrently modified the same data as us, all of our modifications will simultaneously become visible to other threads. 105 | \item Otherwise, our modifications are discarded without being performed. 106 | \end{itemize} 107 | 108 | Concurrency in GHC (Glasgow Haskell Compiler) is "lightweight", which means that both thread creation and context switching overheads are extremely low. We can create thousands of threads simultaneously without any problem. Scheduling of Haskell threads is done internally in the Haskell runtime system, and doesn't make use of any operating system-supplied thread packages. 109 | 110 | In this library we have tried to imitate and provide similar high-level interface to our transactions library as is provided in case of STM. We will see the example of our interface later. 111 | 112 | 113 | 114 | \section{Lock Based Implementations} 115 | Most of the existing implementations use locks on files for concurrency control. A simplified model of database file locking is given below. 116 | 117 | From the point of view of a single process, a database file can be in one of five locking states: 118 | \begin{itemize} 119 | \item \emph{Unlocked:} No locks are held on the database. The database may be neither read nor written. Any internally cached data is considered suspect and subject to verification against the database file before being used. Other processes can read or write the database as their own locking states permit. This is the default state. 120 | \item \emph{Shared:} The database may be read but not written. Any number of processes can hold shared locks at the same time, hence there can be many simultaneous readers. But no other thread or process is allowed to write to the database file while one or more shared locks are active. 121 | \item \emph{Reserved:} A reserved lock means that the process is planning on writing to the database file at some point in the future but that it is currently just reading from the file. Only a single reserved lock may be active at one time, though multiple shared locks can coexist with a single reserved lock. Reserved differs from pending in that new shared locks can be acquired while there is a reserved lock. 122 | \item \emph{Pending:} A pending lock means that the process holding the lock wants to write to the database as soon as possible and is just waiting on all current shared locks to clear so that it can get an exclusive lock. No new shared locks are permitted against the database if a pending lock is active, though existing shared locks are allowed to continue. 123 | \item \emph{Exclusive:} An exclusive lock is needed in order to write to the database file. Only one exclusive lock is allowed on the file and no other locks of any kind are allowed to coexist with an exclusive lock. 124 | \end{itemize} 125 | 126 | An example of database engine which follows such model is SQlite. 127 | Multiple readers can read from the database file. 128 | Only a single writer can write to the database at any given time and all the reader have to wait till the write is complete. 129 | Hence such a model is good for heavy read-based applications but inefficient when multiple write transactions try to modify the database concurrently. 130 | 131 | \pagebreak 132 | 133 | \section{Algorithm} 134 | 135 | \subsection{Simplified version} 136 | 137 | We ignore operations other than read and write instructions. We assume that transactions may perform arbitrary computations on data in local buffers in between reads and writes. Our simplified schedules consist of only read and write instructions. 138 | 139 | Any transaction has access to following three operations : readblock, writeblock and checkpoint. 140 | 141 | 142 | 143 | \subsection{Hardware Assumptions} FT does not assume that sector writes are atomic . Even though newer disk storage systems guarantee sector writes as atomic using miniature battery based power supply just after a power failure , FT follows a pessimistic approach and assumes that no such facility might be provided by the underlying hardware. 144 | 145 | FT assumes that the operating system will buffer writes and that a write request will return before data has actually been stored onto the disk. FT assumes that the flush or fsync will not return until all pending write operations for the file that is being flushed have completed. This assumption might not hold in actual practice. It is possible that the flush and fsync system calls are broken on windows and linux. This is unfortunate as it might lead to data inconsistency during a power failure. However, there is nothing that FT can do to test for or remedy the above situation. The possibility of such scenarios can be reduced by guaranteeing good uptime quality of the hardware. 146 | 147 | FT assumes that the detection and/or correction of bit errors caused by cosmic rays, thermal noise, quantum fluctuations, device driver bugs, or other mechanisms, is the responsibility of the underlying hardware and operating system. FT does not add any redundancy to the database file for the purpose of detecting corruption or I/O errors. FT assumes that the data it reads is exactly the same data that it previously wrote. 148 | 149 | By default, FT assumes that an operating system call to write a range of bytes will not damage or alter any bytes outside of that range even if a power lose or OS crash occurs during that write. 150 | 151 | \subsection{Commit Workflow} 152 | For maintaining the transaction information and efficient execution of concurrent transactions , we utilize concepts like write ahead logging (WAL) in SQlite, bloomfilters, serializability, etc. 153 | 154 | We use bloomfilters to find out rapidly and memory-efficiently, whether a block is present in a journal or not. 155 | A Bloom filter is a space-efficient probabilistic data structure that is used to test whether an element is a member of a set. False positives are possible, but false negatives are not; i.e. a query returns either "inside set (may be wrong)" or "definitely not in set". 156 | Every journal has a corresponding bloomfilter. Bloomfilters instead of indexes are kept in memory to keep memory requirements low and provide good read performance at the same time. 157 | Bloomfilter keeps track of the list of blocks in a journal file. 158 | 159 | We store a global queue of committed(valid) journals. 160 | 161 | If a transaction is purely read-based , it need not keep any journal. 162 | If the transaction is in ReadWrite Mode , it will have its own journal. 163 | 164 | ReadBlock first tries reading from its own journal if it exists. Then it tries to read from the queue of commited journals. It doesnt actually need to perform I/O on every journal to find the block. It checks the bloomfilter associated with each journal and if bloomfilter reports true , it actually performs I/O to find out if it is true. As a side note, it is obvious that the goal is to minimize the false rate positive of bloomfilter so that such costly disk accesses can be minimized. Proper real time benchmarking and fine-tuning of bloom filter parameters can reduce false-positive rate and optimize the performance. If it really finds the block , then it fetches the data from the disk and returns it. If the bloomfilter had signalled a false-positive , it needs to continue its search further down the queue recursively. If the queue is fully traversed and the block is not found , it finally reads from the main database file. 165 | 166 | WriteBlock simply writes the block data to its own journal file. It might latter fail to commit if there are conflicts. This is discussed below. 167 | 168 | When a transaction needs to commit , it first checks whether there is a conflict between the blocks that it read and the blocks changed between the file version when it started and the current file version. If a conflict exists , the transaction must abort. If there are no such conflicts and it was a read only transaction then it commits successfully. Whereas, in case of a read-write transaction , it adds its journal to the journal queue with the incremented file version. It also increments the file version of the main database file. It flushes the required buffers so that the data gets actually written to the disk to guarantee durability. 169 | 170 | 171 | 172 | 173 | \subsection{Acid Guarantees} 174 | \begin{itemize} 175 | \item Atomicity: 176 | Transaction either commits or aborts. 177 | \item Consistency: 178 | The set of valid journals and transaction file is always consistent 179 | \item Isolation: 180 | Since the journal of a concurrently running transaction is not yet valid(as it is not yet committed) , the journal must not be in the global journal queue. Hence , a concurrent transaction can never see the change made by another running transaction. 181 | \item Durability: 182 | Once a transaction commits , fsync must ensure that the data is actually written onto the disk. Hence assuming our calls to fsync are always called at the appropriate time , the burden of durability rests on the fsync system call correctness in the host operating system. 183 | \end{itemize} 184 | 185 | \subsection{Sequencer} 186 | If there are large number of transactions executing concurrently , the number of journals can grow large extremely fast. There is a need to keep the number of journals in control else it would lead to extremely bad read performance. A checkpoint operation transfers all the data in journal files to the main database file. Hence , frequent checkpointing would reduce the overhead of read operations. 187 | 188 | \subsection{Read-write Trade-off} 189 | There is a trade-off between average read performance and average write performance. To maximize the read performance, one wants to keep the number of journal files as small as possible and hence run checkpoints frequently. To maximize write performance, one wants to amortize the cost of each checkpoint over as many writes as possible, meaning that one wants to run checkpoints infrequently and let the no of journal files grow as large as possible before each checkpoint. The decision of how often to run checkpoints may therefore vary from one application to another depending on the relative read and write performance requirements of the application. 190 | 191 | 192 | 193 | \pagebreak 194 | \section{Implementation Details} 195 | 196 | Here we will explain some important parts of our implementations which are directly taken from the lhs files. The explanation is in the blog style of writing. 197 | 198 | Lets try to capture the transaction into a data-type first. Here 199 | we are implementing a very basic version of transactions. So our 200 | transaction system only provide two type of operations to be performed 201 | on the file. 202 | 203 | \begin{enumerate} 204 | \item \textbf{ReadBlock} is to read the data from the given block number. 205 | \item \textbf{WriteBlock} is to write the given data on the block number provide. 206 | \end{enumerate} 207 | 208 | We can also think of adding operations like append block, modify block 209 | etc., but to keep it simple we only support these two basic operations. 210 | Now lets look at the data definition of the File Transaction (FT) data 211 | type. 212 | 213 | \begin{code}[name=Transactions] 214 | -- Transactions.lhs 215 | -- | Transaction Datatype 216 | data FT a = 217 | Done a | 218 | ReadBlock BlockNumber (BS.ByteString -> FT a) | 219 | WriteBlock BlockNumber BS.ByteString (FT a) 220 | \end{code} 221 | 222 | ByteString is made explicit as all the data needs to be serialized to ByteString before writing. 223 | The types of ReadBlock and WriteBlock looks a little odd. We will see later how it helps in actually passing the continuations. 224 | Lets see the monad definiton of the FT datatype. 225 | 226 | \begin{code}[name=Transactions] 227 | -- Transactions.lhs 228 | -- | Monad Definition for the Transaction. 229 | instance Monad FT where 230 | return = Done 231 | m >>= f = case m of 232 | Done a -> f a 233 | ReadBlock bn c -> ReadBlock bn (\i -> c i >>= f) 234 | WriteBlock bn x c -> WriteBlock bn x (c >>= f) 235 | \end{code} 236 | 237 | Here we will see how the types of ReadBlock and WriteBlock actually help 238 | in our continuation passing style of programming. Lets see a simple 239 | example of interface our implementation provides. 240 | Consider the famous banking example for transactions. We want to 241 | transfer x fund from account A to account B. 242 | Lets assume that fund informations of A and B are stored in the same 243 | file at block number a and b respectively. 244 | First we will see the basic example to deposit amount to an account. 245 | Here is a function which deposits x amount to the given account. 246 | 247 | \begin{code} 248 | deposit a x = do 249 | amount <- ReadBlock a return 250 | WriteBlock a (amount + x) (return ()) 251 | \end{code} 252 | 253 | Lets see how this is translated to explicit notation without do 254 | notation. 255 | 256 | \begin{code}[stepnumber=2] 257 | ReadBlock a return >>= (\amount -> WriteBlock a 258 | (amount +x) (return())) 259 | ReadBlock a (\i -> return i >>= 260 | \amount -> WriteBlock a (amount + x) (return())) 261 | ReadBlock a (\i -> Done i >>= 262 | \amount -> WriteBlock a (amount + x) (return())) 263 | ReadBlock a (\amount -> 264 | WriteBlock a (amount + x) (return())) 265 | \end{code} 266 | 267 | Looks like it got transformed to whatever we wanted. This is how continuation passing style actually works. 268 | It was a little frustrating to write return while writing ReadBlock and 269 | WriteBlock. So lets define some helpers to help us avoiding the 270 | repetitions. 271 | 272 | \begin{code}[name=Transactions] 273 | -- Transactions.lhs 274 | -- | readBlockT to be used inside the FT Monad 275 | readBlockT :: BlockNumber -> FT BS.ByteString 276 | readBlockT = flip ReadBlock return 277 | -- | writeBlockT to be used inside the FT Monad 278 | writeBlockT :: BlockNumber -> BS.ByteString -> FT () 279 | writeBlockT v x = WriteBlock v x $ return () 280 | \end{code} 281 | 282 | Now the above deposit function can easily be written as: 283 | 284 | \begin{code} 285 | deposit a x = do 286 | amount <- readBlockT a 287 | writeBlockT a (amount + x) 288 | \end{code} 289 | 290 | Now we want to actually perform the transactions satisfying all the ACID 291 | guarantees. So we need to write a function to actually convert our 292 | Transactions from FT monad to IO monad and perform them. 293 | According to the semantics of a transaction, a transaction can either 294 | fail or succeed. So we should provide atleast two types of functions to 295 | run a transaction which are as follows: 296 | 297 | \begin{code}[name=Transactions] 298 | -- Transactions.lhs 299 | -- | Runs the given transaction on the file. 300 | -- Transaction may fail in which case it returns Nothing. 301 | runTransaction :: FT a -> TFile -> IO (Maybe a) 302 | runTransaction = runT Nothing False 303 | -- | Runs the transaction on the file. 304 | -- If transaction fails then reruns it 305 | -- with higher priority. 306 | retryTransaction :: FT a -> TFile -> IO a 307 | retryTransaction ft tFile = fromJust <$> 308 | runT Nothing True ft tFile 309 | \end{code} 310 | 311 | At this point before implementing anything else we are interested in how 312 | we will be actually using them. Here I will also introduce you to the 313 | power of composing two transactions and running them as one. Lets 314 | comeback to our backing example. 315 | 316 | \begin{code}[name=Transactions] 317 | transfer a b x = do 318 | deposit a (-x) 319 | deposit b x 320 | \end{code} 321 | 322 | Here is the function to remove x amount from account A and deposit it to 323 | the account B. We have implemented a very loose semantics here as to not 324 | check if A's balance is less than 0 etc.. I just wanted to show the power 325 | of composing functions. Now we can just do runTransaction on transfer to 326 | run this transaction. 327 | 328 | \begin{code}[name=Transactions] 329 | -- Transactions.lhs 330 | runTransaction (transfer a b 100) 331 | \end{code} 332 | 333 | The semantics of runTransaction automatically 334 | takes care of all the possible failures and roll-back in case of 335 | transaction failure. 336 | 337 | runTransaction actually has to take care of lots of things: 338 | 339 | \begin{itemize} 340 | \item Gets the file Version at which the transaction started. 341 | \item Performs the transactions on the Journal File 342 | \item Then tries to commit the transaction.Checks the present version against the version at which it started. Here comes the idea of space efficient Bloom filters which are actually used to check whether the current transaction should commit successfully or not. This operation is in a critical section as it might leave the file in inconsistent state if it is not isolated. 343 | \item return the output if transaction succeeds otherwise adds the failed transactions to the failed queue. If retryTransaction was used then it again tries to perform the transaction. 344 | \end{itemize} 345 | 346 | All the journals of the transactions get aggregated over time which might result in poor read performances over time. So we need to actually checkpoint the committed journals back to the database file. 347 | 348 | \begin{code}[name=Transactions] 349 | -- Transactions.lhs 350 | sequencer :: TFile -> IO () 351 | \end{code} 352 | 353 | Sequencer actually does this. It checks for the committed journal files, writes them to the actual database and removes them. 354 | 355 | \section{Future Improvements} 356 | None of the implementation is good for all the purposes. This implementation is good when we have lots of not colliding write transactions. If too many transaction collide and fail then we get poor concurrency. Thus other goal would be to add locking transactions which actually perform the write transactions sequentially similar to sqlite. 357 | Other step will be in the direction of heavily testing the implementation. As it is always very difficult to test alone concurrent applications, we on the other hand also have lots of file operations. The quickcheck library in Haskell provides very good testing mechanism but it requires to separate the backed from the IO monad and to provide an abstraction over it. 358 | Here is our basic approach of creating a pure interface to IO which actually don't perform IO but simulate the file-system. 359 | 360 | Here the basic idea is to capture the Backend in a typeclass. We will be exploiting the type-families extension of Haskell to separate backend from the actual transaction implementation. This has great advantage to replace our backend with whatever kind we want. 361 | 362 | \begin{code}[name=Backend,firstnumber=1] 363 | -- Backend.hs 364 | class (Show a,Monad m) => Backend b m a | b -> m where 365 | data Handle :: * -> * 366 | type BlockNumber :: * 367 | open :: FilePath -> Mode -> m (Handle b) 368 | close :: Handle b -> m () 369 | readBlock ::(Eq BlockNumber)=>Handle b->BlockNumber->m a 370 | writeBlock ::(Eq BlockNumber)=>Handle b->BlockNumber->a->m () 371 | sync :: Handle b -> m () 372 | \end{code} 373 | 374 | We have added the basic functions which we need in the transaction implementation. This greatly condense the side effects possible to only these functions. 375 | 376 | Here is the pure implementation of the file system in terms of state monad. 377 | 378 | \begin{code}[name=Backend,firstnumber=last] 379 | -- Backend.hs 380 | instance Show a => Backend (TestDisk a) (State (TestDisk a)) a where 381 | data Handle (TestDisk a) = Handle FilePath 382 | type BlockNumber = Int 383 | open fp m = do 384 | disk <- get 385 | put $ openFile disk fp m 386 | return $ Handle fp 387 | close (Handle fp) = do 388 | disk <- get 389 | put $ closeFile disk fp 390 | readBlock (Handle fp) bn = do 391 | t <- get 392 | case readB t fp bn buffers of 393 | Nothing -> error "Block Not Found" 394 | Just a -> return a 395 | writeBlock (Handle fp) bn a = do 396 | t <- get 397 | put $ writeB t fp bn a 398 | sync (Handle fp) = do 399 | t <- get 400 | put $ flushBuffer t fp 401 | \end{code} 402 | 403 | Such an implementation allows us to test lots of the properties which are otherwise difficult to test. It also helps in explicitly specifying what kind of expectation and assumptions you make on your backend. 404 | 405 | The very simple representation of the disk simulated as a map between FilePath and File. 406 | We also have buffers to simulate sync. This will help to simulate power failures and hardware crashes as the data present in the buffers are lost but not the disk. \\ 407 | The File representation is very simple. Just a map between block number and the data present at that location. 408 | 409 | \begin{code}[name=Backend] 410 | -- Backend.hs 411 | data TestDisk a = TestDisk { 412 | disk :: M.Map FilePath (File a) 413 | , buffers :: M.Map FilePath (File a) 414 | , bufferSize :: Int -- Current buffer Size 415 | , openFiles :: M.Map FilePath Mode 416 | } deriving (Eq,Show) 417 | data File a = File { 418 | blocks :: M.Map Int a 419 | , size :: Int 420 | } deriving (Eq, Show) 421 | 422 | data Mode = ReadWrite | Read | Write deriving (Eq,Show) 423 | \end{code} 424 | 425 | This was a very basic structure of file system which we will be using to write test-cases using quick check. Another major thing left to abstract is concurrency and IORefs. This will be our next focus of work in the future. Another area of focus will be into optimizing the implementation and comparing its performance with sqlite. This will greatly improve our understanding whether Haskell is suitable for writing industrial strength databases. 426 | 427 | 428 | \bibliographystyle{IEEEbib} 429 | \begin{thebibliography}{10} 430 | \bibitem[1]{stm}Anthony Discolo, Tim Harris, Simon Marlow, Simon Peyton Jones, and Satnam Singh. Lock-free data structures using STMs in Haskell. In Eighth International Symposium on Functional and Logic Programming (FLOPS’06), April 2006. 431 | \bibitem[2]{sqlite}Atomic Commit in sqlite, \url{http://www.sqlite.org/atomiccommit.html} 432 | \bibitem[3]{bloomfilter}Bloom Filter, \url{http://en.wikipedia.org/wiki/Bloom_filter} 433 | \bibitem[4]{WAL}Write-Ahead Logging, \url{http://www.sqlite.org/draft/wal.html} 434 | \end{thebibliography} 435 | 436 | \pagebreak 437 | 438 | 439 | 440 | \begin{figure}[htb] 441 | \centering 442 | \includegraphics[angle=90]{1} 443 | \end{figure} 444 | 445 | \pagebreak 446 | 447 | \begin{figure}[htb] 448 | \centering 449 | \includegraphics[angle=90]{2} 450 | \end{figure} 451 | 452 | \pagebreak 453 | 454 | \begin{figure}[htb] 455 | \centering 456 | \includegraphics[angle=90]{2} 457 | \end{figure} 458 | 459 | \pagebreak 460 | 461 | \end{document} 462 | --------------------------------------------------------------------------------