├── README
├── Setup.hs
├── refs
    ├── 1.png
    ├── 2.png
    ├── 3.png
    ├── report.pdf
    ├── Makefile
    ├── slidy
    │   └── slidy.css
    ├── disk_based_transactions.md
    └── report.tex
├── src
    ├── Makefile
    ├── System
    │   └── HaskDB
    │   │   ├── Fsync.hsc
    │   │   ├── Fsync.hs
    │   │   ├── Makefile
    │   │   ├── algorithm.md
    │   │   ├── FileHeader.lhs
    │   │   ├── lock-based-implementation.md
    │   │   ├── Introduction.md
    │   │   ├── Backend_test.hs
    │   │   ├── Backend.hs
    │   │   ├── FileHandling.lhs
    │   │   ├── TransactionFH.lhs
    │   │   ├── Journal.lhs
    │   │   └── Transactions.lhs
    ├── abstract.md
    └── tTest.hs
├── LICENSE
└── HaskDB.cabal


/README:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Setup.hs:
--------------------------------------------------------------------------------
1 | import Distribution.Simple
2 | main = defaultMain
3 | 


--------------------------------------------------------------------------------
/refs/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/course/HaskDB/master/refs/1.png


--------------------------------------------------------------------------------
/refs/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/course/HaskDB/master/refs/2.png


--------------------------------------------------------------------------------
/refs/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/course/HaskDB/master/refs/3.png


--------------------------------------------------------------------------------
/refs/report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/course/HaskDB/master/refs/report.pdf


--------------------------------------------------------------------------------
/src/Makefile:
--------------------------------------------------------------------------------
1 | clean :
2 | 		rm -f *.header
3 | 		rm -f *.trans 
4 | 		rm -f *.journal 
5 | 


--------------------------------------------------------------------------------
/refs/Makefile:
--------------------------------------------------------------------------------
 1 | all: disk_based_transactions.md
 2 | 		pandoc --data-dir=datadir --self-contained -t slidy -s disk_based_transactions.md -o disk_based_transactions-slidy.html
 3 | 		pandoc --data-dir=datadir --self-contained -t slidy -s poster.md -o poster.html
 4 | 		pandoc -t s5 -s disk_based_transactions.md -o disk_based_transactions-s5.html
 5 | 		pandoc -t dzslides -s disk_based_transactions.md -o disk_based_transactions-dzslides.html
 6 | #		pandoc -t beamer -s disk_based_transactions.md -o disk_based_transactions.pdf
 7 | 
 8 | 
 9 | clean:
10 | 		rm -rf *.html *.pdf
11 | 


--------------------------------------------------------------------------------
/src/System/HaskDB/Fsync.hsc:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE ForeignFunctionInterface #-}
 2 | module System.HaskDB.Fsync (sync) where
 3 | 
 4 | import Foreign.C.Error (throwErrnoIfMinus1_)
 5 | import Foreign.C.Types (CInt)
 6 | import System.Posix.Types (Fd(..))
 7 | import System.Posix.IO (handleToFd)
 8 | import System.IO 
 9 | 
10 | #include <unistd.h>
11 | 
12 | foreign import ccall "fsync"
13 |        c_fsync :: CInt -> IO CInt
14 | 
15 | fsync :: Fd -> IO ()
16 | fsync (Fd fd) =  throwErrnoIfMinus1_ "fsync" $ c_fsync fd
17 | 
18 | sync :: Handle -> IO ()
19 | sync fh = do 
20 |     fd <- handleToFd fh  
21 |     fsync fd
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/src/System/HaskDB/Fsync.hs:
--------------------------------------------------------------------------------
 1 | module System.HaskDB.Fsync (sync) where 
 2 | import Foreign.C.Error
 3 | import Foreign.C.Types
 4 | import qualified System.IO as IO
 5 | import qualified System.IO.Error as IO
 6 | import GHC.IO.FD (FD(..))
 7 | import GHC.IO.Handle.Types (Handle__(..))
 8 | import GHC.IO.Handle.Internals (wantWritableHandle)
 9 | import Data.Typeable
10 | foreign import ccall "unistd.h fsync" c_fsync :: CInt -> IO CInt
11 | sync :: IO.Handle -> IO ()
12 | sync h = do
13 |   IO.hFlush h
14 |   wantWritableHandle "hSync" h $ fsyncH
15 |     where
16 |       fsyncH Handle__ {haDevice = dev} = maybe (return ()) fsyncD $ cast dev
17 |       fsyncD FD {fdFD = fd} = throwErrnoPathIfMinus1_ "fsync" (show h)
18 |                               (c_fsync fd)
19 | 


--------------------------------------------------------------------------------
/src/abstract.md:
--------------------------------------------------------------------------------
 1 | ## Abstract 
 2 | 
 3 | Transactions form an integral part of any multi-user concurrent database system. 
 4 | Most databases implement transactions by various locking mechanism . We present 
 5 | here lock free composable file transaction implemented in a high level 
 6 | functional programming language , Haskell. The use of haskell allows  us to easily 
 7 | compose transactions and run them as a single atomic transaction thus providing 
 8 | a powerful abstraction for the programmer's use . The implementantion is not 
 9 | only highly concurrent but also non blocking as there are no locks on files. 
10 | We have cleverly exploited the space efficient probabilistic data structure 
11 | called Bloom Filters to keep primary memory requirements as low as possible.
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/src/System/HaskDB/Makefile:
--------------------------------------------------------------------------------
 1 | markdown : *.lhs
 2 | 		pandoc -f markdown+lhs Introduction.md -o 01.md
 3 | 		pandoc -f markdown+lhs lock-based-implementation.md -o 02.md
 4 | 		pandoc -f markdown+lhs algorithm.md -o 03.md
 5 | 		pandoc -f markdown+lhs Transactions.lhs -o 04.md
 6 | 		pandoc -f markdown+lhs Journal.lhs -o 05.md
 7 | 		pandoc -f markdown+lhs TransactionFH.lhs -o 06.md
 8 | 		pandoc -f markdown+lhs FileHandling.lhs -o 07.md
 9 | 		pandoc -f markdown+lhs FileHeader.lhs -o 08.md
10 |         
11 | 		rm -f report.md
12 | 		touch report.md
13 | 		cat 01.md >> report.md
14 | 		cat 02.md >> report.md
15 | 		cat 03.md >> report.md
16 | 		cat 04.md >> report.md
17 | 		cat 05.md >> report.md
18 | 		cat 06.md >> report.md
19 | 		cat 07.md >> report.md
20 | 		cat 08.md >> report.md
21 | 
22 | 		pandoc -f markdown+lhs report.md -o report.pdf
23 | clean :
24 | 		rm -f *.aux
25 | 		rm -f *.log
26 | 		rm -f *.ptb
27 | 		rm -f *.pdf
28 | 
29 | cleanmd : 
30 | 		rm -f *.md
31 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c)2012, Satvik Chauhan , Pankaj More
 2 | 
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 |     * Redistributions of source code must retain the above copyright
 9 |       notice, this list of conditions and the following disclaimer.
10 | 
11 |     * Redistributions in binary form must reproduce the above
12 |       copyright notice, this list of conditions and the following
13 |       disclaimer in the documentation and/or other materials provided
14 |       with the distribution.
15 | 
16 |     * Neither the name of Satvik Chauhan , Pankaj More nor the names of other
17 |       contributors may be used to endorse or promote products derived
18 |       from this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/src/tTest.hs:
--------------------------------------------------------------------------------
 1 | import System.HaskDB.Transactions 
 2 | import System.HaskDB.TransactionFH
 3 | import System.HaskDB.FileHeader
 4 | import System.HaskDB.FileHandling
 5 | import qualified System.HaskDB.FileHandling as FH
 6 | import qualified Data.ByteString as BS 
 7 | import qualified Data.ByteString.Char8 as BSC 
 8 | import Control.Concurrent 
 9 | import Control.Applicative
10 | 
11 | rand size = BS.take size $ BSC.pack $ take size $ cycle "This is a fttest data"
12 | rand2 size = BS.take size $ BSC.pack $ take size $ cycle "Hello"
13 | 
14 | fttest = do 
15 |     writeBlockT 0 $ rand 4096 
16 |     writeBlockT 1 $ rand 4096 
17 |     writeBlockT 2 $ rand2 4096 
18 |     writeBlockT 3 $ rand 4096 
19 |     writeBlockT 4 $ rand2 4096 
20 |     writeBlockT 0 $ rand2 4096 
21 |     readBlockT 0 
22 | 
23 | fttest2 = do 
24 |     readBlockT  0
25 | 
26 | fttest3 = do
27 |     writeBlockT 0 $ rand2 4096
28 | 
29 | main = do 
30 |     newDB <- openTF "fttest.dat"
31 |     forkIO $ runTransaction fttest newDB >>= print 
32 |     forkIO $ retryTransaction fttest newDB >>= print 
33 |     forkIO $ retryTransaction fttest newDB >>= print 
34 |     forkIO $ retryTransaction fttest newDB >>= print 
35 |     forkIO $ retryTransaction fttest newDB >>= print 
36 |     forkIO $ retryTransaction fttest newDB >>= print 
37 |     forkIO $ retryTransaction fttest newDB >>= print 
38 |     forkIO $ retryTransaction fttest newDB >>= print 
39 |     forkIO $ retryTransaction fttest newDB >>= print 
40 |     forkIO $ retryTransaction fttest newDB >>= print 
41 |     forkIO $ retryTransaction fttest newDB >>= print 
42 |     t <- getFileVersion (fHandle  newDB)
43 |     print t
44 |     {-sequencer newDB-}
45 |     
46 | 


--------------------------------------------------------------------------------
/src/System/HaskDB/algorithm.md:
--------------------------------------------------------------------------------
 1 | # Algorithm
 2 | 
 3 | Any transaction has access to three operations :
 4 | 
 5 | * readblock
 6 | * writeblock 
 7 | * checkpoint
 8 | 
 9 | When a write operation is performed , the change is stored in a separate *journal* file per transaction.When a read operation is performed , it reads from the latest file which contains the block data.
10 | 
11 | Committing a transaction increments the *file version* of the trasaction file and marks its own journal file as valid.
12 | 
13 | The set of all valid journal files and the main transaction file together represent the current state of the database at any given point of time.
14 | 
15 | Transaction would fail/abort when the readblocks of the current transaction conflict with the blocks changed between the start of the current transaction and current *version* .
16 | 
17 | Write operations always go through without any waiting period and hence they are optimal.
18 | The time complexity of read operation is proportional to the number of journal files on disk.
19 | 
20 | checkpointing transfers all the data in journal files to the main database file. Hence , frequent checkpointing would improve the overhead of read operations.
21 | 
22 | There is a tradeoff between average read performance and average write performance. To maximize the read performance, one wants to keep the number of journal files as small as possible and hence run checkpoints frequently. To maximize write performance, one wants to amortize the cost of each checkpoint over as many writes as possible, meaning that one wants to run checkpoints infrequently and let the no of journal files grow as large as possible before each checkpoint. The decision of how often to run checkpoints may therefore vary from one application to another depending on the relative read and write performance requirements of the application.
23 | 
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/src/System/HaskDB/FileHeader.lhs:
--------------------------------------------------------------------------------
 1 | >  module System.HaskDB.FileHeader where 
 2 | >  import qualified System.HaskDB.FileHandling as FH
 3 | >  import qualified Data.ByteString as BS 
 4 | >  import System.IO 
 5 | >  import Data.Serialize 
 6 | >  import Data.IORef 
 7 | >  
 8 | >  -- | Header of The Database to store schema and other related information . Only One thread can operate with the header 
 9 | >  -- at a time . Other threads will be blocked untill allowed . 
10 | >  --
11 | >  data Header = Header 
12 | >      {fileVersion :: Integer
13 | >      }
14 | >  instance Serialize Header where 
15 | >      put h = put $ fileVersion h 
16 | >      get = do 
17 | >          w <- get 
18 | >          return $ Header w 
19 | >          
20 | >  -- | Creates an empty header File .
21 | >  {-createHeader :: FH.FHandle -> IO () -}
22 | >  {-createHeader fh = do -}
23 | >      {-fp <- openFile (getHeaderName fh) WriteMode -}
24 | >      {-hClose fp -}
25 | >  
26 | >  -- | Change the FileVersion . 
27 | >  -- TODO : Make read and write together atomic and non failing in case of exception. 
28 | >  changeFileVersion :: FH.FHandle -> IO () 
29 | >  changeFileVersion fh  = do 
30 | >      atomicModifyIORef (FH.fileVersion fh) (\a -> (a+1,()))
31 | >      {-writeIORef (FH.fileVersion fh) bs -}
32 | >      {-BS.writeFile headerPath (encode $ Header bs)-}
33 | >    {-where -}
34 | >      {-headerPath = getHeaderName fh -}
35 | >  
36 | >  -- | Get the current Version of the File . 
37 | >  getFileVersion :: FH.FHandle -> IO Integer
38 | >  getFileVersion fh = readIORef $ FH.fileVersion fh 
39 | >      {-header <- BS.readFile  $ getHeaderName fh-}
40 | >      {-case decode header of -}
41 | >          {-Left err -> error err -}
42 | >          {-Right d -> return $ fileVersion d-}
43 | >  
44 | >  
45 | >  -- | Given File Handle return the header file name  
46 | >  getHeaderName :: FH.FHandle -> FilePath 
47 | >  getHeaderName fh = FH.filePath fh ++ ".header"
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/HaskDB.cabal:
--------------------------------------------------------------------------------
 1 | -- HaskDB.cabal auto-generated by cabal init. For additional options,
 2 | -- see
 3 | -- http://www.haskell.org/cabal/release/cabal-latest/doc/users-guide/authors.html#pkg-descr.
 4 | -- The name of the package.
 5 | Name:                HaskDB
 6 | 
 7 | -- The package version. See the Haskell package versioning policy
 8 | -- (http://www.haskell.org/haskellwiki/Package_versioning_policy) for
 9 | -- standards guiding when and how versions should be incremented.
10 | Version:             0.1
11 | 
12 | -- A short (one-line) description of the package.
13 | Synopsis:            Persistent database in Haskell
14 | 
15 | -- A longer description of the package.
16 | -- Description:         
17 | 
18 | -- The license under which the package is released.
19 | License:             BSD3
20 | 
21 | -- The file containing the license text.
22 | License-file:        LICENSE
23 | 
24 | -- The package author(s).
25 | Author:              Satvik Chauhan , Pankaj More
26 | 
27 | -- An email address to which users can send suggestions, bug reports,
28 | -- and patches.
29 | Maintainer:          mystic.satvik@gmail.com
30 | 
31 | -- A copyright notice.
32 | -- Copyright:           
33 | 
34 | Category:            System
35 | 
36 | Build-type:          Simple
37 | 
38 | -- Extra files to be distributed with the package, such as examples or
39 | -- a README.
40 | -- Extra-source-files:  
41 | 
42 | -- Constraint on the version of Cabal needed to build this package.
43 | Cabal-version:       >=1.2
44 | 
45 | 
46 | Library
47 |   -- Modules exported by the library.
48 |   Exposed-modules: System.HaskDB.FileHandling, System.HaskDB.Journal, System.HaskDB.Transactions, System.HaskDB.Fsync
49 |   Hs-Source-Dirs : src/
50 |   
51 |   -- Packages needed in order to build this package.
52 |   Build-depends: base >= 4 && < 5, cereal >= 0.3.5 , bytestring ,text , containers , random >= 1.0 ,directory >= 1.1,dequeue >= 0.1.5,bloomfilter >= 1.2.6 ,unix
53 |   
54 |   -- Modules not exported by this package.
55 |   -- Other-modules:       
56 |   
57 |   -- Extra tools (e.g. alex, hsc2hs, ...) needed to build the source.
58 |   -- Build-tools:         
59 |   
60 | 


--------------------------------------------------------------------------------
/src/System/HaskDB/lock-based-implementation.md:
--------------------------------------------------------------------------------
 1 | Most of the existing implementations use locks on files for concurrency control.
 2 | 
 3 | # A simplified model of database file locking 
 4 | 
 5 | From the point of view of a single process, a database file can be in one of five locking states:
 6 | 
 7 | * **UNLOCKED** : No locks are held on the database. The database may be neither read nor written. Any internally cached data is considered suspect and subject to verification against the database file before being used. Other processes can read or write the database as their own locking states permit. This is the default state.
 8 | * **SHARED** :   The database may be read but not written. Any number of processes can hold SHARED locks at the same time, hence there can be many simultaneous readers. But no other thread or process is allowed to write to the database file while one or more SHARED locks are active.
 9 | * **RESERVED** :     A RESERVED lock means that the process is planning on writing to the database file at some point in the future but that it is currently just reading from the file. Only a single RESERVED lock may be active at one time, though multiple SHARED locks can coexist with a single RESERVED lock. RESERVED differs from PENDING in that new SHARED locks can be acquired while there is a RESERVED lock.
10 | * **PENDING** :  A PENDING lock means that the process holding the lock wants to write to the database as soon as possible and is just waiting on all current SHARED locks to clear so that it can get an EXCLUSIVE lock. No new SHARED locks are permitted against the database if a PENDING lock is active, though existing SHARED locks are allowed to continue.
11 | * **EXCLUSIVE** :    An EXCLUSIVE lock is needed in order to write to the database file. Only one EXCLUSIVE lock is allowed on the file and no other locks of any kind are allowed to coexist with an EXCLUSIVE lock.
12 | 
13 | An example of database engine which follows such model is SQlite.
14 | Multiple readers can read from the database file. 
15 | Only a single writer can write to the database at any given time and all the reader have to wait till the write is complete.
16 | Hence such a model is good for heavy read-based applications but inefficient when multiple write transactions try to modify the database concurrently.
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/src/System/HaskDB/Introduction.md:
--------------------------------------------------------------------------------
 1 | # Introduction 
 2 | 
 3 | A Transaction is a group of operations combined into a logical unit of work. Developer uses transactions to control and maintain the consistency and integrity of each action in a transaction, despite errors that might occur inthe system either due to concurrency or hardware failure. In database context a transaction on a database is considered to be set of operations performed such that database is in a consistent state before and after the transaction and in case of failure the system must provide ways to rollback partial transactions to bring the database back into a consistent state. Transaction in the database environment mainly serve two purposes :
 4 | 
 5 | 1. To provide reliable units of work that allow correct recovery from failures and keep a database consistent even in cases of system failure, when execution stops (completely or partially) and many operations upon a database remain uncompleted, with unclear status.
 6 | 2. To provide isolation between programs accessing a database concurrently. If this isolation is not provided the programs outcome are possibly erroneous.
 7 | 
 8 | Transactions provide an "all-or-nothing" proposition, stating that each work-unit performed in a database must either complete in its entirety or have no effect whatsoever. Further, the system must isolate each transaction from other transactions, results must conform to existing constraints in the database, and transactions that complete successfully must get written to durable storage. Thus , even in case of hardware failure a transaction once commited must persist. A transaction is expected satisfy ACID guarantees. 
 9 | 
10 | * **Atomicity** means a transaction can end only in two ways : either successfully , in which case all its effects are written to a durable storage and persist between power failures , or unsuccessfully , in which case it has no effect , it can be assumed that this transaction never happened. 
11 | * **Consistency** just means that a transaction is written correctly, so that when it completes successfully, the database is in a consistent state.
12 | * **Isolation** means that the transaction appears to execute completely alone, even if, in fact, other transactions are running simultaneously. In other words, transaction always sees a consistent snapshot of the database and is totally unaware of the changes made by other transactions which are running concurrently with it. 
13 | * **Durability** means that a successful transaction's changes are permanent and persist in all possible sorts of failures. In practice this means to be written on disk. 
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/src/System/HaskDB/Backend_test.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE FlexibleInstances #-}
 2 | import System.HaskDB.Backend 
 3 | import Test.QuickCheck
 4 | import Control.Monad
 5 | import qualified Data.Map as M 
 6 | import Test.HUnit 
 7 | import Test.Framework (defaultMain, testGroup)
 8 | import Test.Framework.Providers.QuickCheck2 (testProperty)
 9 | 
10 | instance Arbitrary Mode  where 
11 |     arbitrary = oneof $ map return [Write , ReadWrite , Read]
12 | 
13 | instance (Ord a, Arbitrary a,Arbitrary b) => Arbitrary (M.Map a b) where 
14 |     arbitrary =  liftM M.fromList (listOf1 arbitrary)
15 | 
16 | instance Arbitrary a => Arbitrary (File a) where 
17 |     arbitrary = liftM2 File arbitrary arbitrary
18 | 
19 | instance Arbitrary a => Arbitrary (TestDisk a) where 
20 |     arbitrary = do 
21 |         b <- arbitrary :: Arbitrary b => Gen (M.Map FilePath (File b))
22 |         let list = M.toList b
23 |         len <- choose (1,length list)
24 |         subset <- takeSubset len list 
25 |         size <- arbitrary :: Gen Int 
26 |         op <- openFiles (map fst subset) M.empty 
27 |         return $ TestDisk b b size  op
28 |       where 
29 |         takeSubset :: Int -> [a] -> Gen [a]
30 |         takeSubset 0 list = return list 
31 |         takeSubset n list = do 
32 |             t <- oneof $ map return list 
33 |             rest <- takeSubset (n-1) list 
34 |             return (t:rest)
35 |         openFiles :: [FilePath] -> M.Map FilePath Mode -> Gen (M.Map FilePath Mode)
36 |         openFiles [] f = return f
37 |         openFiles (x:xs) f = do 
38 |             md <- arbitrary :: Gen Mode 
39 |             openFiles xs (M.insert x md f)
40 | 
41 | 
42 | -- closing two times a file should be same 
43 | prop_double_close :: TestDisk String -> String -> Bool 
44 | prop_double_close t s = (closeFile (closeFile t s) s)  == closeFile t s
45 | 
46 | -- writing and then reading the same block should return same value 
47 | prop_readwrite :: TestDisk String -> String -> Int -> String -> Bool 
48 | prop_readwrite t fp bn val = readB (writeB (openFile t fp ReadWrite) fp bn val) fp bn buffers == Just val
49 | 
50 | -- after syncing block should be on disk
51 | prop_sync :: TestDisk String -> String -> Int -> String -> Bool 
52 | prop_sync t fp bn val = readB (flushBuffer (writeB (openFile t fp ReadWrite) fp bn val) fp) fp bn disk == Just val
53 | 
54 | -- closing a file automatically syncs 
55 | prop_close :: TestDisk String -> String -> Int -> String -> Bool 
56 | prop_close t fp bn val = readB (closeFile (writeB (openFile t fp ReadWrite) fp bn val) fp) fp bn disk == Just val
57 | 
58 | main = defaultMain tests
59 | tests = [
60 |         testGroup "Sorting Group 1" [
61 |                 testProperty "double close " prop_double_close,
62 |                 testProperty "read and write " prop_readwrite,
63 |                 testProperty "sync" prop_sync,
64 |                 testProperty "close" prop_close
65 |             ]]
66 | 
67 | 


--------------------------------------------------------------------------------
/src/System/HaskDB/Backend.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE FunctionalDependencies #-}
  2 | {-# LANGUAGE FlexibleInstances #-}
  3 | {-# LANGUAGE FlexibleContexts #-}
  4 | {-# LANGUAGE MultiParamTypeClasses #-}
  5 | {-# LANGUAGE TypeFamilies #-}
  6 | module System.HaskDB.Backend where 
  7 | import qualified Data.Map as M
  8 | import Control.Monad.State
  9 | import Control.Applicative
 10 | import Data.Maybe
 11 | 
 12 | 
 13 | class (Show a,Monad m) => Backend b m a | b -> m where 
 14 |     data Handle :: * -> * 
 15 |     type BlockNumber :: *       -- I want BlockNumber to have constraint Eq 
 16 |     open :: FilePath -> Mode -> m (Handle b)
 17 |     close :: Handle b -> m () 
 18 |     readBlock :: (Eq BlockNumber) => Handle b -> BlockNumber -> m a 
 19 |     writeBlock :: (Eq BlockNumber) => Handle b -> BlockNumber -> a -> m ()
 20 |     sync :: Handle b -> m ()
 21 | 
 22 | 
 23 | 
 24 | 
 25 | instance Show a => Backend (TestDisk a)  (State (TestDisk a)) a where 
 26 |     data Handle (TestDisk a) = Handle FilePath 
 27 |     type BlockNumber = Int 
 28 |     open fp m = do 
 29 |         disk <- get
 30 |         put $ openFile disk fp m 
 31 |         return $ Handle fp
 32 |     close (Handle fp) = do 
 33 |         disk <- get 
 34 |         put $ closeFile disk fp
 35 |     readBlock (Handle fp) bn = do 
 36 |         t <- get 
 37 |         case readB t fp bn buffers of 
 38 |             Nothing -> error "Block Not Found"
 39 |             Just a -> return a
 40 |     writeBlock (Handle fp) bn a = do 
 41 |         t <- get 
 42 |         put $ writeB t fp bn a 
 43 |     sync (Handle fp) = do  
 44 |         t <- get 
 45 |         put $ flushBuffer t fp 
 46 | 
 47 | -- Pure Functions to simulate file system . This implementation only support one handle per file .
 48 | -- Pessimistic implementation so data is not written to the simulated disk until sync is not called .
 49 | data TestDisk a = TestDisk {
 50 |     disk :: M.Map FilePath (File a)
 51 |     , buffers :: M.Map FilePath (File a) 
 52 |     , bufferSize :: Int -- Current buffer Size 
 53 |     , openFiles :: M.Map FilePath Mode
 54 |     } deriving (Eq,Show)
 55 | 
 56 | data File a = File {
 57 |     blocks :: M.Map Int a 
 58 |     , size :: Int 
 59 |     } deriving (Eq, Show)
 60 | 
 61 | data Mode = ReadWrite | Read | Write deriving (Eq,Show) 
 62 | 
 63 | -- Opens the file . If file is not present then creates it . Also create the buffer space.
 64 | openFile :: TestDisk a -> FilePath -> Mode -> TestDisk a
 65 | openFile t fp md = let 
 66 |                       d = if M.member fp (disk t) 
 67 |                              then t
 68 |                              else t { disk = M.insert fp (File M.empty 0) (disk t)
 69 |                                     , buffers = M.insert fp (File M.empty 0) (buffers t)}
 70 |                       opfs = openFiles d 
 71 |                       newOpfs = if M.member fp opfs then opfs else M.insert fp md opfs 
 72 |                    in d {openFiles = newOpfs}
 73 | 
 74 | -- Flushes the buffer and closes the handle .
 75 | closeFile ::  TestDisk a -> FilePath -> TestDisk a
 76 | closeFile t fp = let opfs = openFiles t 
 77 |                      d = if M.member fp (disk t) && M.member fp (buffers t) then flushBuffer t fp else t
 78 |                      newOpfs = M.delete fp opfs 
 79 |                      in d {openFiles = newOpfs , buffers = M.delete fp (buffers t)}
 80 | 
 81 | -- Writes the buffers of the given handle to the disk 
 82 | flushBuffer :: TestDisk a -> FilePath -> TestDisk a
 83 | flushBuffer t fp = let file = M.findWithDefault undefined fp (disk t) -- Sure that undefined will never be called
 84 |                        fsize = size file 
 85 |                        buffer = M.findWithDefault undefined fp (buffers t)
 86 |                        bsize = size buffer
 87 |                        blks = M.union (blocks buffer) (blocks file)
 88 |                     in t { disk = M.insert fp (File blks (fsize + bsize)) (disk t) 
 89 |                          , buffers = M.insert fp (File M.empty 0) (buffers t)}
 90 | 
 91 | 
 92 | isClosed :: TestDisk a -> FilePath -> Bool 
 93 | isClosed t fp = M.member fp (openFiles t )
 94 | 
 95 | -- Reads from the buffers and if not present then reads from the disk 
 96 | readB :: TestDisk a -> FilePath -> Int -> (TestDisk a -> M.Map FilePath (File a)) -> Maybe a  
 97 | readB t fp bn from = case join (M.lookup bn . blocks <$> M.lookup fp (from t)) of  
 98 |     Nothing -> readB t fp bn disk 
 99 |     Just a -> Just a 
100 |     
101 | 
102 | -- Writes to the buffers  
103 | writeB :: TestDisk a -> FilePath -> Int -> a -> TestDisk a
104 | writeB t fp bn v = let file = (\(File b s)  -> File (M.insert bn v b) s)  <$> M.lookup fp (buffers t)
105 |                    in maybe t (\a -> t {buffers = M.insert fp a (buffers t) , bufferSize = bufferSize t + (sizeOf a)}) file
106 | 
107 | sizeOf _ = 1
108 | 
109 | 
110 |    
111 | 
112 | 
113 | 
114 | 


--------------------------------------------------------------------------------
/src/System/HaskDB/FileHandling.lhs:
--------------------------------------------------------------------------------
  1 | >  module System.HaskDB.FileHandling where 
  2 | >  
  3 | >  import System.IO
  4 | >  import GHC.Word
  5 | >  import qualified Data.ByteString as BS 
  6 | >  import qualified Data.ByteString.Char8 as BSC 
  7 | >  import Control.Concurrent 
  8 | >  import Control.Applicative
  9 | >  import System.HaskDB.Fsync 
 10 | >  import Data.IORef 
 11 | >  -- | New File Handle with blocksize in Bytes stored in the Handle 
 12 | >  -- synchVar is used to provide atomicity to read and write operations . 
 13 | >  data FHandle = FHandle {
 14 | >      fileVersion :: IORef (Integer) ,
 15 | >      journalId :: IORef (Integer) , 
 16 | >      filePath :: FilePath , 
 17 | >      synchVar :: MVar () ,
 18 | >      blockSize :: Int ,
 19 | >      handle :: Handle
 20 | >      }
 21 | >  
 22 | >  -- | Opens the file given path , mode and BlockSize and returns the file handle 
 23 | >  openF :: FilePath -> IOMode -> Int -> IO FHandle 
 24 | >  openF fp m bs = do 
 25 | >      {-print ("opening handle" ++ fp)-}
 26 | >      p <- openBinaryFile fp m
 27 | >      sync <- newMVar ()
 28 | >      ver <- newIORef 0
 29 | >      jid <- newIORef 0
 30 | >      return $ FHandle ver jid fp sync bs p
 31 | >  
 32 | >  
 33 | >  -- | Closes the file Handle 
 34 | >  closeF :: FHandle -> IO ()
 35 | >  closeF fh  = do 
 36 | >      {-print ("closing handle" ++ (filePath fh))-}
 37 | >      hClose $ handle fh
 38 | >  
 39 | >  -- | Given the File Handle and block number , reads the block and returns it 
 40 | >  readBlock :: FHandle -> Integer -> IO BS.ByteString 
 41 | >  readBlock fh i = do 
 42 | >      isCl <- hIsClosed (handle fh)
 43 | >      if isCl 
 44 | >          then 
 45 | >          print ("Handle is Closed" ++ (filePath fh))
 46 | >          else return ()
 47 | >      _ <- takeMVar (synchVar fh)
 48 | >      hSeek (handle fh) AbsoluteSeek $ (toInteger $ blockSize fh)*i 
 49 | >      ret <- fst <$> BS.foldr (\a (ls,b) -> if (a /= 000) then (BS.cons a ls,True) else ( if b then (BS.cons a ls,b) else (ls,b)) ) (BS.empty,False) <$> BS.hGet (handle fh) (blockSize fh) -- filters out \NUL character . 
 50 | >      putMVar (synchVar fh) ()
 51 | >      return ret 
 52 | >  
 53 | >  -- | Given the File Handle and block number and data to be written in ByteString , writes the given block. Adds \NUL if data is less than the block size .  
 54 | >  writeBlock :: FHandle -> Integer -> BS.ByteString -> IO () 
 55 | >  writeBlock fh i bs = do 
 56 | >      _ <- takeMVar (synchVar fh)
 57 | >      currentPos <- hTell (handle fh) 
 58 | >      hSeek (handle fh) AbsoluteSeek $ (toInteger $ blockSize fh)*i 
 59 | >      BS.hPut (handle fh) (BS.take (blockSize fh) (BS.append bs (BS.pack (take (blockSize fh) $ cycle [000 :: GHC.Word.Word8] ))))
 60 | >      hSeek (handle fh) AbsoluteSeek currentPos             -- Necessary because concurrent use of appendBlock and writeBlock was resulting in overwriting of block next to where writeBlock was called with append block . 
 61 | >      putMVar (synchVar fh) ()
 62 | >  
 63 | >  -- | Writes all the data . Note that write Block truncates data if size is more than the given block. This will delete all the previous data present in the file. 
 64 | >  writeAll :: FHandle -> BS.ByteString -> IO () 
 65 | >  writeAll fh bs = do 
 66 | >      hSeek (handle fh) AbsoluteSeek 0 
 67 | >      BS.hPut (handle fh) bs 
 68 | >      size  <- hTell (handle fh) 
 69 | >      hSetFileSize (handle fh) size
 70 | >  
 71 | >  readAll :: FHandle -> IO BS.ByteString
 72 | >  readAll fh = do
 73 | >      hSeek (handle fh) AbsoluteSeek 0
 74 | >      BS.hGetContents (handle fh)
 75 | >  
 76 | >  -- | Appends a block at the end of the file 
 77 | >  appendBlock :: FHandle -> BS.ByteString -> IO Integer
 78 | >  appendBlock fh bs = do 
 79 | >      hSeek (handle fh) SeekFromEnd 0 
 80 | >      currentPos <- hTell (handle fh)
 81 | >      BS.hPut (handle fh) (BS.take (blockSize fh) (BS.append bs (BS.pack (take (blockSize fh) $ cycle [000 :: GHC.Word.Word8] ))))
 82 | >      return.floor $ (fromIntegral currentPos) / (fromIntegral $ blockSize fh)
 83 | >  
 84 | >  -- | Reads the last Block of the File and removes if from the File 
 85 | >  getLastBlock :: FHandle -> IO (Maybe BS.ByteString)
 86 | >  getLastBlock fh = do 
 87 | >      fs <- hFileSize (handle fh)
 88 | >      if fs > 0 
 89 | >          then do 
 90 | >              hSeek (handle fh) SeekFromEnd (-(fromIntegral $ blockSize fh))
 91 | >              bs <- BS.hGet (handle fh) (blockSize fh)
 92 | >              {-bs <- BS.hGet (handle fh) 32-}
 93 | >              hSetFileSize (handle fh) (fs - (fromIntegral $ blockSize fh)) 
 94 | >              return $ Just bs 
 95 | >          else 
 96 | >              return Nothing 
 97 | >  
 98 | >  
 99 | >  
100 | >  
101 | >  -- | Flushes the buffer to hard disk 
102 | >  flushBuffer :: FHandle -> IO () 
103 | >  flushBuffer fh = sync $ handle fh 
104 | >  
105 | >  -- | Zeroes out the file . 
106 | >  truncateF :: FHandle -> IO ()
107 | >  truncateF fh = hSetFileSize (handle fh) 0 
108 | >  
109 | >  test = do 
110 | >      c <- openF "abc.b" WriteMode 1024   -- Truncates to zero length file 
111 | >      closeF c
112 | >      p <- openF "abc.b" ReadWriteMode 1024 
113 | >      forkIO $ do 
114 | >          sequence_ $ map (\s -> appendBlock p (BSC.pack (show s))) [1..100]
115 | >      forkIO $ do 
116 | >          sequence_ $ map (\s -> appendBlock p (BSC.pack (show s))) [101..200]
117 | >      appendBlock p (BSC.pack "Hello How are you" )
118 | >      writeBlock p 0 (BSC.pack "First Block")
119 | >      bs <- appendBlock p (BSC.pack "check")
120 | >      print bs
121 | >      threadDelay 1000 -- To keep thread blocked and not close the handle before data is being written . 
122 | >      flushBuffer p 
123 | >      closeF p
124 | >      p <- openF "abc.b" ReadMode 1024
125 | >      x <- sequence $ map (readBlock p) [0..500]
126 | >      print x
127 | >      closeF p
128 | 
129 | 
130 | 


--------------------------------------------------------------------------------
/src/System/HaskDB/TransactionFH.lhs:
--------------------------------------------------------------------------------
  1 | > module System.HaskDB.TransactionFH where 
  2 | > 
  3 | > import qualified Data.ByteString as BS
  4 | > import qualified System.HaskDB.FileHandling as FH 
  5 | > import qualified System.HaskDB.FileHeader as FHD
  6 | > import qualified Data.Dequeue as DQ 
  7 | > import Data.Maybe
  8 | > import Data.Dequeue
  9 | > import Data.BloomFilter.Easy
 10 | > import Data.IORef 
 11 | > import qualified Data.BloomFilter as BF 
 12 | > import System.HaskDB.Journal as JU
 13 | > import qualified Data.ByteString as BS 
 14 | > import Data.Unique 
 15 | > import Data.Word
 16 | > import Data.Serialize
 17 | > import Control.Concurrent 
 18 | > import Control.Applicative
 19 | > import System.IO 
 20 | > 
 21 | > data BlockData = BlockData BS.ByteString 
 22 | > type BlockNumber = Integer
 23 | > data LogDescriptor = LogDescriptor 
 24 | > type FileInformation = FH.FHandle 
 25 | > type FileVersion = Integer
 26 | > 
 27 | > readBlock = FH.readBlock
 28 | > writeBlock = FH.writeBlock
 29 | > 
 30 | > data Transaction = Transaction {
 31 | >     rBlocks :: BlockList 
 32 | >     , tType :: TransactionType 
 33 | >     }
 34 | > 
 35 | > data TransactionType = ReadOnly | ReadWrite {
 36 | >         bloom :: BF.Bloom BlockNumber
 37 | >         , journal :: Journal 
 38 | >         }
 39 | > 
 40 | > 
 41 | > type JBloom = BF.Bloom BlockNumber
 42 | > data JInfo = JInfo 
 43 | >     { getJournal :: JU.Journal
 44 | >     , getBloomFilter :: JBloom
 45 | >     , getfv :: Integer
 46 | >     }
 47 | > 
 48 | > -- TODO : Heap implementation of transactions is much better as  only operation to support is 
 49 | > -- getMinFileVersion and insert a transaction and delete a transaction.
 50 | > data TFile = TFile {
 51 | >     fHandle :: FH.FHandle -- handle of the database file 
 52 | >     , commitSynch :: MVar ()
 53 | >     , jQueue  :: IORef (DQ.BankersDequeue JInfo)
 54 | >     , failedQueue :: IORef (DQ.BankersDequeue (Integer,FileVersion))
 55 | >     , commitedQueue :: IORef (DQ.BankersDequeue JInfo)
 56 | >     , transactions :: IORef (DQ.BankersDequeue (Integer,FileVersion))
 57 | >     }
 58 | > 
 59 | > openTF fpath = do 
 60 | >     handle <- FH.openF fpath ReadWriteMode 1024 
 61 | >     cmVar  <- newMVar () 
 62 | >     jQ <- newIORef DQ.empty 
 63 | >     fQ <- newIORef DQ.empty 
 64 | >     tQ <- newIORef DQ.empty 
 65 | >     return $ TFile handle cmVar jQ fQ tQ 
 66 | > 
 67 | > closeTF tFile = do 
 68 | >     FH.closeF $ fHandle tFile 
 69 | >     q <- readIORef $ jQueue tFile 
 70 | >     closeAll q 
 71 | >   where 
 72 | >     closeAll q = do 
 73 | >         let (a,b) =  popFront q 
 74 | >         case a of 
 75 | >             Nothing -> return () 
 76 | >             Just a -> do JU.closeJournal $ getJournal a
 77 | >                          closeAll b    
 78 | > 
 79 | > 
 80 | > 
 81 | > 
 82 | > 
 83 | > checkInBloomFilter bf bn = elemB bn bf
 84 | > 
 85 | > readBlockFromOwnJournal :: TFile -> BlockNumber -> Transaction -> IO (Maybe BS.ByteString)
 86 | > readBlockFromOwnJournal tf bn d = do
 87 | >     case (tType d) of 
 88 | >       ReadOnly -> return Nothing
 89 | >       ReadWrite _ j -> JU.readFromJournal j bn
 90 | > 
 91 | > -- | First , if it has its own journal , then it should read from that
 92 | > -- else read a block from the most recent journal that contains it 
 93 | > -- else read it from the database
 94 | > 
 95 | > 
 96 | > readBlockJ :: TFile -> BlockNumber -> Transaction -> IO BS.ByteString
 97 | > readBlockJ tf bn d = do
 98 | >   r <- readBlockFromOwnJournal tf bn d
 99 | >   case r of 
100 | >     Just x -> do
101 | >                return x 
102 | >     Nothing -> do
103 | >         q <- readIORef $ jQueue tf
104 | >         if DQ.null q then do return BS.empty else func q bn
105 | >     where 
106 | >         func q bn = case popFront q of
107 | >                     (Just jInfo, q') ->  
108 | >                         case checkInBloomFilter (getBloomFilter jInfo) bn of
109 | >                             False -> func q' bn
110 | >                             True -> do
111 | >                                 d <- JU.readFromJournal (getJournal jInfo) bn
112 | >                                 case d of
113 | >                                     Just x -> return x
114 | >                                     Nothing -> func q' bn
115 | >                     (Nothing , _) -> do 
116 | >                         FH.readBlock (fHandle tf) bn -- read from database file
117 | > 
118 | > -- | Returns True if there is a block from bli which is probably in the  list of bloom filters
119 | > checkF :: [JBloom] -> BlockList -> IO Bool
120 | > {-checkF jbl bli = any id [elemB bn jb | jb <- jbl , bn <- bli]-}
121 | > checkF [] bli =  return False 
122 | > checkF js bli = do 
123 | >     --print $ checkOneBlock js 0
124 | >     (f,rest) <- getBlock bli 
125 | >     print f
126 | >     --print $ "Bloom List " ++ (show js)
127 | >     --print $ "Block List " ++ (show (blocks bli))
128 | >     case f of 
129 | >         Just x -> if checkOneBlock js (toInteger x) then return True else checkF js rest 
130 | >         Nothing -> return False 
131 | >   where 
132 | >     checkOneBlock [] bl = False 
133 | >     checkOneBlock (f:fs) bl = if elemB bl f then True else checkOneBlock fs bl
134 | > 
135 | > 
136 | > getJInfoList :: JId -> JId -> DQ.BankersDequeue JInfo -> [JInfo]
137 | > getJInfoList id1 id2 q = takeWhile (\q -> (getfv q) /= id1) $ takeBack (Data.Dequeue.length q) q
138 | > 
139 | > -- | checkFailure returns True when transaction has to fail. 
140 | > -- The transaction will fail only in the following cases : - 
141 | > -- 1. The set of  readBlocks of the current transaction and writeBlocks of the failed transaction is not disjoint
142 | > -- 2. The set of the readBlocks of current transaction and the blocks changed in between old fileversion and new fileversion is not disjoint
143 | > -- TODO:Union of BloomFilter
144 | > checkSuccess :: FileVersion -> FileVersion -> TFile -> BlockList -> IO Bool 
145 | > checkSuccess oldfv newfv tf bli = not <$> checkFailure oldfv newfv tf bli 
146 | > checkFailure :: FileVersion 
147 | >              -> FileVersion 
148 | >              -> TFile 
149 | >              -> BlockList
150 | >              -> IO Bool 
151 | > checkFailure oldfv newfv tf bli = do
152 | >     {-fq <- readIORef (failedQueue tf)-}
153 | >     {-let fli = map snd (takeFront (Data.Dequeue.length fq) fq)-}
154 | >     {-case checkF fli bli of-}
155 | >         {-True -> return True-}
156 | >         {-False -> do-}
157 | >             q <- readIORef (jQueue tf)
158 | >             let jli = getJInfoList oldfv newfv q
159 | >             let jbl = map (getBloomFilter) jli
160 | >             print $ map getfv jli 
161 | >             f <- checkF jbl bli
162 | >             print f
163 | >             return f
164 | > 
165 | > -- | Interface to maintain list of read Blocks 
166 | > -- Only that much data is kept in the memory which can be fit in a file 
167 | > -- Rest is written on to the hard disk . 
168 | > 
169 | > -- Size of empty list is 8 bytes 
170 | > data BlockList = BlockList {
171 | >     blocks :: [Word64]
172 | >     , size :: Int 
173 | >     , transH :: FH.FHandle
174 | >     }
175 | > 
176 | > addBlock :: Word64 -> BlockList -> IO BlockList
177 | > addBlock b bl = do 
178 | >     let s = size bl 
179 | >     let blkSize = FH.blockSize $ transH bl 
180 | >     let blks = blocks bl
181 | >     if s + 8 > blkSize 
182 | >         then do  
183 | >             FH.appendBlock (transH bl) $ encode blks 
184 | >             return $ BlockList [b] 16 (transH bl)
185 | >         else 
186 | >             return $ BlockList (b:blks) (s+8) (transH bl)
187 | > 
188 | > getBlock :: BlockList -> IO (Maybe Word64,BlockList)
189 | > getBlock bl = do 
190 | >     let s = size bl 
191 | >     let blkSize = FH.blockSize $ transH bl 
192 | >     let blks = blocks bl 
193 | >     if s == 8 
194 | >         then do  
195 | >             bs <- FH.getLastBlock $ transH bl
196 | >             case bs of 
197 | >                 Just bs -> do 
198 | >                     let ls = decode bs
199 | >                     case ls of 
200 | >                         Right (x:xs) -> return (Just x,BlockList xs ((1 + Prelude.length xs)*8) (transH bl))
201 | >                         Left err -> return (Nothing , BlockList [] 8 (transH bl))
202 | >                 Nothing -> return (Nothing , BlockList [] 8 (transH bl))
203 | >         else do  
204 | >             let (b:bs) = blks 
205 | >             return (Just b ,  BlockList bs (s-8) (transH bl))
206 | 
207 | 
208 | 


--------------------------------------------------------------------------------
/src/System/HaskDB/Journal.lhs:
--------------------------------------------------------------------------------
  1 | > module System.HaskDB.Journal where
  2 | > 
  3 | > import System.IO
  4 | > import System.Directory
  5 | > import System.Random
  6 | > import Control.Applicative
  7 | > import Data.Map as Map
  8 | > import Data.Serialize
  9 | > import Data.Text
 10 | > import Data.Maybe
 11 | > import Data.ByteString as BS
 12 | > import qualified Data.ByteString.Char8 as BSC 
 13 | > import qualified System.HaskDB.FileHandling as FH 
 14 | > import System.HaskDB.FileHeader
 15 | > import Data.IORef
 16 | 
 17 | Whenever a block is to be written to the main Transaction File , we can essentially follow one of the following two approaches: - 
 18 | 
 19 | * Copy the original block to the Journal and try writing the changed block to the main file. If the transaction goes through successfully , we can discard the Journal . This approach is simpler to implement and we started with this approach in the beginning. But we came to realize that this would be highly inefficient .
 20 | * Our current approach is based on an idea similar to Write Ahead Logging in Sqlite. Instead of writing the "old data" to journal , we write the new data first to the Journal and then eventually move it to the main file. But this approach means the main file alone is no more the only place to read data from. The main file with all the journals together really give us the latest picture of our data.
 21 | 
 22 | A Journal is a temporary file defined per transaction.It is used to keep track of the blocks changed by a transaction. It stores the new data that needs to be eventually written to the main database. 
 23 | 
 24 | Following are some trivial types for journal 
 25 | 
 26 | > type JHandle = FH.FHandle
 27 | > type OldBlock = Integer
 28 | > type JId = Integer
 29 | 
 30 | A journal object requires a unique identifier , file handles of the header 
 31 | file ,  journal file and the database file and an index storing a mapping of block number in the
 32 | database to block number in journal file.
 33 | 
 34 | > data Journal = Journal { journalID :: JId -- filename prefix of Journal
 35 | >                        , hHandle :: FH.FHandle -- Handle for the header file
 36 | >                        , jHandle :: FH.FHandle -- Handle for the Journal file
 37 | >                        , dHandle :: FH.FHandle -- Handle for the database 
 38 | >                        , oldBlocks :: Map Integer Integer 
 39 | >                        -- Map from block number in database to block number
 40 | >                        -- of the journal
 41 | >                        }
 42 | 
 43 | Closing the Journal object requires closing the handle of both the header file
 44 | as well as the journal file.
 45 | 
 46 | > closeJournal j = do 
 47 | >     FH.closeF $ jHandle j 
 48 | >     FH.closeF $ hHandle j
 49 | 
 50 | > -- | Find all the journals present on the disk 
 51 | > findJournals :: FilePath -> IO [JId]
 52 | > findJournals dp = do
 53 | >     files <- getDirectoryContents dp
 54 | >     return [ read (Data.Text.unpack id)
 55 | >            | file <- files
 56 | >            , id <- maybeToList (stripSuffix (Data.Text.pack ".header") 
 57 | >               (Data.Text.pack file))
 58 | >            ]
 59 | >     -- return a list of JIds
 60 | > 
 61 | > -- | Create a new unique Journal file
 62 | > newJournal :: FH.FHandle -> IO Journal
 63 | > newJournal dh = do
 64 | >                   gen <- newStdGen
 65 | >                   jid <- atomicModifyIORef (FH.journalId dh) (\a -> (a+1,a))
 66 | >                   let filename = show jid
 67 | >                   let header = filename ++ ".header"
 68 | >                   let journal = filename ++ ".journal"
 69 | >                   headerHandle <- FH.openF header ReadWriteMode 1024
 70 | >                   journalHandle <- FH.openF journal ReadWriteMode 1024
 71 | >                   let fJournal = Journal { journalID = jid
 72 | >                                          , hHandle = headerHandle
 73 | >                                          , jHandle = journalHandle
 74 | >                                          , dHandle = dh
 75 | >                                          , oldBlocks = Map.empty
 76 | >                                          }
 77 | >                   return fJournal
 78 | > 
 79 | > -- | Build a Journal from the journal file on disk
 80 | > buildJournal :: JId -> FH.FHandle -> IO Journal
 81 | > buildJournal id dh = do
 82 | >                       let filename = show id
 83 | >                       let header = filename ++ ".header"
 84 | >                       let journal = filename ++ ".journal"
 85 | >                       headerHandle <- FH.openF header ReadWriteMode 1024
 86 | >                       journalHandle <- FH.openF journal ReadWriteMode 1024
 87 | >                       retreivedMap <- readHeader headerHandle
 88 | >                       let fJournal = Journal { journalID = id
 89 | >                                          , hHandle = headerHandle
 90 | >                                          , jHandle = journalHandle
 91 | >                                          , dHandle = dh
 92 | >                                          , oldBlocks = retreivedMap
 93 | >                                          }
 94 | >                       return fJournal
 95 | > 
 96 | > 
 97 | > -- | Reads all the blocks in a Journal
 98 | > readAllBlocksFrom :: Journal -> IO [BS.ByteString]
 99 | > readAllBlocksFrom j = undefined
100 | > 
101 | > -- | Given a block number in the database file , read it from the Journal
102 | > readFromJournal :: Journal -> Integer -> IO (Maybe BS.ByteString)
103 | > readFromJournal j bn = do
104 | >                         let l = Map.lookup bn (oldBlocks j)
105 | >                         case l of
106 | >                             Just val -> Just <$> FH.readBlock  (jHandle j)  val
107 | >                             Nothing -> return Nothing
108 | > 
109 | > -- | Writes the header to the header file
110 | > writeHeader :: Journal -> IO ()
111 | > writeHeader j = do
112 | >   let s = encode (oldBlocks j)
113 | >   FH.writeAll (hHandle j) s
114 | > 
115 | > -- | Reads the header information from the header file
116 | > readHeader :: FH.FHandle -> IO (Map Integer Integer)
117 | > readHeader fh = do
118 | >   val <- FH.readAll fh
119 | >   let (Right m) = decode (val)  --Either String a
120 | >   return m
121 | > 
122 | > -- | Write to a journal given block number and blockData
123 | > writeToJournal :: Journal -> Integer -> BS.ByteString -> IO Journal
124 | > writeToJournal j bn bd = do
125 | >  val <- FH.appendBlock  (jHandle j) bd
126 | >  let newMap = insert bn val (oldBlocks j)
127 | >  let fJournal = Journal { journalID = journalID j
128 | >                      , hHandle = hHandle j
129 | >                      , jHandle = jHandle j
130 | >                      , dHandle = dHandle j
131 | >                      , oldBlocks = newMap
132 | >                      }
133 | >  writeHeader fJournal
134 | >  return fJournal 
135 | 
136 | > -- | To zero out the journal file
137 | > resetJournal :: Journal -> IO ()
138 | > resetJournal = FH.truncateF . jHandle
139 | > 
140 | > -- | Commit the Journal by writing the new FileVersion to its header
141 | > -- change this later
142 | > commitJournal :: Journal -> IO ()
143 | > commitJournal j = changeFileVersion (dHandle j)
144 | > 
145 | > 
146 | > -- | Replay the data from the Journal to bring back the database into a consistent
147 | > -- state in case of a power failure
148 | > -- Read every block from the journal and write to the database 
149 | > replayJournal :: Journal -> IO ()
150 | > replayJournal j = do
151 | >  let li = toAscList $ oldBlocks j --potential speedup if ascending?
152 | >  sequence_ $ Prelude.map readAndWrite li
153 | >  where
154 | >  readAndWrite :: (Integer,Integer) -> IO()
155 | >  readAndWrite (bn,jbn) = do
156 | >          maybebd <- readFromJournal j bn
157 | >          case maybebd of 
158 | >              Just bd -> FH.writeBlock (dHandle j) bn bd
159 | >              Nothing -> return ()
160 | 
161 | > test = do 
162 | >     d <- FH.openF "abc.b" ReadWriteMode 1024   -- Truncates to zero length file 
163 | >     j <- newJournal d
164 | >     k <- writeToJournal j 256 (BSC.pack "Block number 256")
165 | >     l <- writeToJournal k 666 (BSC.pack "Block number 666")
166 | >     let x = Map.lookup 256 (oldBlocks l)
167 | >     let y = Map.lookup 666 (oldBlocks l)
168 | >     r1 <- readFromJournal l 256
169 | >     r2 <- readFromJournal l 666
170 | >     case r1 of 
171 | >         Just v -> BS.putStrLn v
172 | >         _ -> return ()
173 | 
174 | 
175 | 


--------------------------------------------------------------------------------
/refs/slidy/slidy.css:
--------------------------------------------------------------------------------
  1 | /* slidy.css
  2 | 
  3 |    Copyright (c) 2005-2010 W3C (MIT, ERCIM, Keio), All Rights Reserved.
  4 |    W3C liability, trademark, document use and software licensing
  5 |    rules apply, see:
  6 | 
  7 |    http://www.w3.org/Consortium/Legal/copyright-documents
  8 |    http://www.w3.org/Consortium/Legal/copyright-software
  9 | */
 10 | body
 11 | {
 12 |   margin: 0 0 0 0;
 13 |   padding: 0 0 0 0;
 14 |   width: 100%;
 15 |   height: 100%;
 16 |   color: black;
 17 |   background-color: white;
 18 |   font-family: "URW Palladio L", "Palatino Linotype", sans-serif;
 19 |   font-size: 14pt;
 20 | }
 21 | 
 22 | code
 23 | {
 24 |   font-family: "DejaVu Sans Mono", monospace;
 25 | }
 26 | 
 27 | div.toolbar {
 28 |   position: fixed; z-index: 200;
 29 |   top: auto; bottom: 0; left: 0; right: 0;
 30 |   height: 1.2em; text-align: right;
 31 |   padding-left: 1em;
 32 |   padding-right: 1em; 
 33 |   font-size: 60%;
 34 |   color: red;
 35 |   background-color: rgb(240,240,240);
 36 |   border-top: solid 1px rgb(180,180,180);
 37 | }
 38 | 
 39 | div.toolbar span.copyright {
 40 |   color: black;
 41 |   margin-left: 0.5em;
 42 | }
 43 | 
 44 | div.initial_prompt {
 45 |   position: absolute;
 46 |   z-index: 1000;
 47 |   bottom: 1.2em;
 48 |   width: 100%;
 49 |   background-color: rgb(200,200,200);
 50 |   opacity: 0.35;
 51 |   background-color: rgb(200,200,200, 0.35);
 52 |   cursor: pointer;
 53 | }
 54 | 
 55 | div.initial_prompt p.help {
 56 |   text-align: center;
 57 | }
 58 | 
 59 | div.initial_prompt p.close {
 60 |   text-align: right;
 61 |   font-style: italic;
 62 | }
 63 | 
 64 | div.slidy_toc {
 65 |   position: absolute;
 66 |   z-index: 300;
 67 |   width: 60%;
 68 |   max-width: 30em;
 69 |   height: 30em;
 70 |   overflow: auto;
 71 |   top: auto;
 72 |   right: auto;
 73 |   left: 4em;
 74 |   bottom: 4em;
 75 |   padding: 1em;
 76 |   background: rgb(240,240,240);
 77 |   border-style: solid;
 78 |   border-width: 2px;
 79 |   font-size: 60%;
 80 | }
 81 | 
 82 | div.slidy_toc .toc_heading {
 83 |   text-align: center;
 84 |   width: 100%;
 85 |   margin: 0;
 86 |   margin-bottom: 1em;
 87 |   border-bottom-style: solid;
 88 |   border-bottom-color: rgb(180,180,180);
 89 |   border-bottom-width: 1px;
 90 | }
 91 | 
 92 | div.slide {
 93 |   z-index: 20;
 94 |   margin: 0 0 0 0;
 95 |   padding-top: 0;
 96 |   padding-bottom: 0;
 97 |   padding-left: 20px;
 98 |   padding-right: 20px;
 99 |   border-width: 0;
100 |   clear: both;
101 |   top: 0;
102 |   bottom: 0;
103 |   left: 0;
104 |   right: 0;
105 |   line-height: 120%;
106 |   background-color: transparent;
107 | }
108 | 
109 | div.slide > div.figure {
110 |   text-align: center
111 | }
112 | 
113 | div.background {
114 |   display: none;
115 | }
116 | 
117 | div.handout {
118 |   margin-left: 20px;
119 |   margin-right: 20px;
120 | }
121 | 
122 | div.slide.titlepage {
123 |   text-align: center;
124 | }
125 | 
126 | div.slide.titlepage h1 {
127 |   padding-top: 10%;
128 |   margin-right: 0;
129 | }
130 | 
131 | div.slide h1 {
132 |   padding-left: 0;
133 |   padding-right: 20pt;
134 |   padding-top: 4pt;
135 |   padding-bottom: 4pt;
136 |   margin-top: 0;
137 |   margin-left: 0;
138 |   margin-right: 60pt;
139 |   margin-bottom: 0.5em;
140 |   display: block; 
141 |   font-size: 160%;
142 |   line-height: 1.2em;
143 |   background: transparent;
144 | }
145 | 
146 | div.toc {
147 |   position: absolute;
148 |   top: auto;
149 |   bottom: 4em;
150 |   left: 4em;
151 |   right: auto;
152 |   width: 60%;
153 |   max-width: 30em;
154 |   height: 30em;
155 |   border: solid thin black;
156 |   padding: 1em;
157 |   background: rgb(240,240,240);
158 |   color: black;
159 |   z-index: 300;
160 |   overflow: auto;
161 |   display: block;
162 |   visibility: visible;
163 | }
164 | 
165 | div.toc-heading {
166 |   width: 100%;
167 |   border-bottom: solid 1px rgb(180,180,180);
168 |   margin-bottom: 1em;
169 |   text-align: center;
170 | }
171 | 
172 | pre {
173 |  font-size: 80%;
174 |  font-weight: bold;
175 |  line-height: 120%;
176 |  padding-top: 0.2em;
177 |  padding-bottom: 0.2em;
178 |  padding-left: 1em;
179 |  padding-right: 1em;
180 |  border-style: solid;
181 |  border-left-width: 1em;
182 |  border-top-width: thin;
183 |  border-right-width: thin;
184 |  border-bottom-width: thin;
185 |  border-color: #95ABD0;
186 |  color: #00428C;
187 |  background-color: #E4E5E7;
188 | }
189 | 
190 | li pre { margin-left: 0; }
191 | 
192 | blockquote { font-style: italic }
193 | 
194 | img { background-color: transparent }
195 | 
196 | p.copyright { font-size: smaller }
197 | 
198 | .center { text-align: center }
199 | .footnote { font-size: smaller; margin-left: 2em; }
200 | 
201 | a img { border-width: 0; border-style: none }
202 | 
203 | a:visited { color: navy }
204 | a:link { color: navy }
205 | a:hover { color: red; text-decoration: underline }
206 | a:active { color: red; text-decoration: underline }
207 | 
208 | a {text-decoration: none}
209 | .navbar a:link {color: white}
210 | .navbar a:visited {color: yellow}
211 | .navbar a:active {color: red}
212 | .navbar a:hover {color: red}
213 | 
214 | ul { list-style-type: square; }
215 | ul ul { list-style-type: disc; }
216 | ul ul ul { list-style-type: circle; }
217 | ul ul ul ul { list-style-type: disc; }
218 | li { margin-left: 0.5em; margin-top: 0.5em; font-weight: bold }
219 | li li { font-size: 85%; font-weight: normal }
220 | li li li { font-size: 85%; font-weight: normal }
221 | strong { color: red; }
222 | li li strong { color: black; }
223 | /* pandoc's rules about when to insert paragraphs don't interact well
224 | with the requirement for blank lines around code blocks.  Let's just
225 | neutralize the effects of p in bullets.  */
226 | li > p { margin: 0em; }
227 | 
228 | div dt
229 | {
230 |   margin-left: 0;
231 |   margin-top: 1em;
232 |   margin-bottom: 0.5em;
233 |   font-weight: bold;
234 | }
235 | div dd
236 | {
237 |   margin-left: 2em;
238 |   margin-bottom: 0.5em;
239 | }
240 | 
241 | 
242 | p,pre,ul,ol,blockquote,h2,h3,h4,h5,h6,dl,table {
243 |   margin-left: 1em;
244 |   margin-right: 1em;
245 | }
246 | 
247 | p.subhead { font-weight: bold; margin-top: 2em; }
248 | 
249 | .smaller { font-size: smaller }
250 | .bigger { font-size: 130% }
251 | 
252 | td,th { padding: 0.2em }
253 | 
254 | ul {
255 |   margin: 0.5em 1.5em 0.5em 1.5em;
256 |   padding: 0;
257 | }
258 | 
259 | ol {
260 |   margin: 0.5em 1.5em 0.5em 1.5em;
261 |   padding: 0;
262 | }
263 | 
264 | ul { list-style-type: square; }
265 | ul ul { list-style-type: disc; }
266 | ul ul ul { list-style-type: circle; }
267 | ul ul ul ul { list-style-type: disc; }
268 | 
269 | ul li { 
270 |   list-style: square;
271 |   margin: 0.1em 0em 0.6em 0;
272 |   padding: 0 0 0 0;
273 |   line-height: 140%;
274 | }
275 | 
276 | ol li { 
277 |   margin: 0.1em 0em 0.6em 1.5em;
278 |   padding: 0 0 0 0px;
279 |   line-height: 140%;
280 |   list-style-type: decimal;
281 | }
282 | 
283 | li ul li { 
284 |   font-size: 85%; 
285 |   font-style: normal;
286 |   list-style-type: disc;
287 |   background: transparent;
288 |   padding: 0 0 0 0;
289 | }
290 | li li ul li { 
291 |   font-size: 85%; 
292 |   font-style: normal;
293 |   list-style-type: circle;
294 |   background: transparent;
295 |   padding: 0 0 0 0;
296 | }
297 | li li li ul li {
298 |   list-style-type: disc;
299 |   background: transparent;
300 |   padding: 0 0 0 0;
301 | }
302 | 
303 | li ol li {
304 |   list-style-type: decimal;
305 | }
306 | 
307 | 
308 | li li ol li {
309 |   list-style-type: decimal;
310 | }
311 | 
312 | /*
313 |  setting class="outline on ol or ul makes it behave as an
314 |  ouline list where blocklevel content in li elements is
315 |  hidden by default and can be expanded or collapsed with
316 |  mouse click. Set class="expand" on li to override default
317 | */
318 | 
319 | ol.outline li:hover { cursor: pointer }
320 | ol.outline li.nofold:hover { cursor: default }
321 | 
322 | ul.outline li:hover { cursor: pointer }
323 | ul.outline li.nofold:hover { cursor: default }
324 | 
325 | ol.outline { list-style:decimal; }
326 | ol.outline ol { list-style-type:lower-alpha }
327 | 
328 | ol.outline li.nofold {
329 |   padding: 0 0 0 20px;
330 |   background: transparent url(../graphics/nofold-dim.gif) no-repeat 0px 0.5em;
331 | }
332 | ol.outline li.unfolded {
333 |   padding: 0 0 0 20px;
334 |   background: transparent url(../graphics/fold-dim.gif) no-repeat 0px 0.5em;
335 | }
336 | ol.outline li.folded {
337 |   padding: 0 0 0 20px;
338 |   background: transparent url(../graphics/unfold-dim.gif) no-repeat 0px 0.5em;
339 | }
340 | ol.outline li.unfolded:hover {
341 |   padding: 0 0 0 20px;
342 |   background: transparent url(../graphics/fold.gif) no-repeat 0px 0.5em;
343 | }
344 | ol.outline li.folded:hover {
345 |   padding: 0 0 0 20px;
346 |   background: transparent url(../graphics/unfold.gif) no-repeat 0px 0.5em;
347 | }
348 | 
349 | ul.outline li.nofold {
350 |   padding: 0 0 0 20px;
351 |   background: transparent url(../graphics/nofold-dim.gif) no-repeat 0px 0.5em;
352 | }
353 | ul.outline li.unfolded {
354 |   padding: 0 0 0 20px;
355 |   background: transparent url(../graphics/fold-dim.gif) no-repeat 0px 0.5em;
356 | }
357 | ul.outline li.folded {
358 |   padding: 0 0 0 20px;
359 |   background: transparent url(../graphics/unfold-dim.gif) no-repeat 0px 0.5em;
360 | }
361 | ul.outline li.unfolded:hover {
362 |   padding: 0 0 0 20px;
363 |   background: transparent url(../graphics/fold.gif) no-repeat 0px 0.5em;
364 | }
365 | ul.outline li.folded:hover {
366 |   padding: 0 0 0 20px;
367 |   background: transparent url(../graphics/unfold.gif) no-repeat 0px 0.5em;
368 | }
369 | 
370 | /* for slides with class "title" in table of contents */
371 | a.titleslide { font-weight: bold; font-style: italic }
372 | 
373 | /*
374 |  hide images for work around for save as bug
375 |  where browsers fail to save images used by CSS
376 | */
377 | img.hidden { display: none; visibility: hidden }
378 | div.initial_prompt { display: none; visibility: hidden }
379 | 
380 |   div.slide {
381 |      visibility: visible;
382 |      position: inherit;
383 |   }
384 |   div.handout {
385 |      border-top-style: solid;
386 |      border-top-width: thin;
387 |      border-top-color: black;
388 |   }
389 | 
390 | @media screen {
391 |   .hidden { display: none; visibility: visible }
392 | 
393 |   div.slide.hidden { display: block; visibility: visible }
394 |   div.handout.hidden { display: block; visibility: visible }
395 |   div.background { display: none; visibility: hidden }
396 |   body.single_slide div.initial_prompt { display: block; visibility: visible }
397 |   body.single_slide div.background { display: block; visibility: visible }
398 |   body.single_slide div.background.hidden { display: none; visibility: hidden }
399 |   body.single_slide .invisible { visibility: hidden }
400 |   body.single_slide .hidden { display: none; visibility: hidden }
401 |   body.single_slide div.slide { position: absolute }
402 |   body.single_slide div.handout { display: none; visibility: hidden }
403 | }
404 | 
405 | @media print {
406 |   .hidden { display: block; visibility: visible }
407 | 
408 |   div.slide pre { font-size: 60%; padding-left: 0.5em; }
409 |   div.toolbar { display: none; visibility: hidden; }
410 |   div.slidy_toc { display: none; visibility: hidden; }
411 |   div.background { display: none; visibility: hidden; }
412 |   div.slide { page-break-before: always }
413 |   /* :first-child isn't reliable for print media */
414 |   div.slide.first-slide { page-break-before: avoid }
415 | }
416 | 
417 | 


--------------------------------------------------------------------------------
/src/System/HaskDB/Transactions.lhs:
--------------------------------------------------------------------------------
  1 | Transactions.lhs
  2 | ================================================
  3 | 
  4 | > {-# LANGUAGE NoMonomorphismRestriction #-}
  5 | > -- | File Transactions Module .
  6 | > module System.HaskDB.Transactions (
  7 | >    runTransaction 
  8 | >    , retryTransaction
  9 | >    , readBlockT 
 10 | >    , writeBlockT
 11 | >    , openTF 
 12 | >    , closeTF 
 13 | >    , sequencer
 14 | >    ) where 
 15 | > import Control.Concurrent 
 16 | > import qualified Data.ByteString as BS 
 17 | > import qualified System.HaskDB.FileHandling as FH 
 18 | > import System.HaskDB.FileHeader 
 19 | > import Data.Maybe 
 20 | > import System.IO hiding (withFile)
 21 | > import System.HaskDB.Journal 
 22 | > import System.HaskDB.TransactionFH hiding(checkFailure)
 23 | > import qualified Data.BloomFilter as BF
 24 | > import qualified Data.Dequeue as DQ 
 25 | > import Data.BloomFilter.Hash (cheapHashes)
 26 | > import Data.IORef
 27 | > import Data.Unique
 28 | > import Control.Applicative
 29 | > import Control.Exception
 30 | 
 31 | The main task is to capture the transaction into a datatype first. Here we are implementing a very basic version of transactions. So our transaction system only provide two type of operations to be performed on the file. 
 32 | 
 33 | * **ReadBlock** is to read the data from the given block number. 
 34 | * **WriteBlock** is to write the given data on the block number provide. 
 35 | 
 36 | We can also think of adding operations like append block , modify block etc. , but to keep it simple we only support these two basic operations. 
 37 | 
 38 | Now lets look at the data definition of the File Transaction (FT) data type . 
 39 | 
 40 | 
 41 | > -- | Transaction DataType 
 42 | > data FT a = 
 43 | >     Done a | -- ^ Any value in the FT monad will be captured in Done a. 
 44 | >     ReadBlock BlockNumber (BS.ByteString -> FT a) | -- ^ FT a here represents
 45 | > -- rest of the computation. This follows from the continuation passing style.
 46 | >     WriteBlock BlockNumber BS.ByteString (FT a) -- ^ FT a here is similar to 
 47 | > -- above to have continuations. 
 48 | 
 49 | Dont be scared from the types of ReadBlock and WriteBlock. We will see later how it helps in actually passing the continuations.
 50 | 
 51 | Lets see the monad definiton of the FT datatype.
 52 | 
 53 | > -- | Monad Definition for the Transaction. 
 54 | > instance Monad FT where 
 55 | >     return = Done 
 56 | >     m >>= f = case m of 
 57 | >         Done a -> f a 
 58 | >         ReadBlock bn c -> ReadBlock bn (\i -> c i  >>= f) 
 59 | >         WriteBlock bn x c -> WriteBlock bn x (c >>= f)
 60 | 
 61 | Here we will see how the types of ReadBlock and WriteBlock actually help in our continuation passing style of programming. Lets see a simple example of interface our implementation provides . 
 62 | 
 63 | Consider the famous banking example for transactions. We want to transfer x fund from account A to account B.
 64 | 
 65 | Lets assume that fund informations of A and B are stored in the same file at block number a and b respectively.
 66 | 
 67 | Here is a function which deposits x amount to the given account. 
 68 | 
 69 | > deposit a x = do 
 70 | >       block <- ReadBlock a return 
 71 | >       WriteBlock a (increase block x) (return ())
 72 | >   where 
 73 | >        increase bs x = undefined -- amount bs = amount bs + x 
 74 | 
 75 | Lets see how this is translated to explicit notation without do notation. 
 76 | 
 77 | > --   (\ block -> WriteBlock a (increase block x) (return () )))
 78 | > -- ReadBlock a (\ block -> Done block  >>= 
 79 | > --   (\ block -> WriteBlock a (increase block x) (return () )))
 80 | > -- ReadBlock a (\ block -> WriteBlock a (increase block x) (return ()) )
 81 | 
 82 | Looks like it got transformed to whatever we wanted. 
 83 | 
 84 | It was a liitle frustrating to write return while writing ReadBlock and WriteBlock. So lets define feh helpers to help us avoiding the repetitions. 
 85 | 
 86 | > -- | readBlockT to be used inside the FT Monad 
 87 | > readBlockT :: BlockNumber -> FT BS.ByteString
 88 | > readBlockT = flip ReadBlock return 
 89 | > -- | writeBlockT to be used inside the FT Monad 
 90 | > writeBlockT :: BlockNumber -> BS.ByteString -> FT ()
 91 | > writeBlockT v x =  WriteBlock v x $ return () 
 92 | 
 93 | Now we want to actually perform the transactions satisfying all the ACID guarantees. So we need to write a fucntion to actually convert our Transactions from FT monad to IO monad and perform them. 
 94 | 
 95 | According to the semantics of a transaction , a transaction can either fail or succeed. So we should provide atleast two types of functions to run a transaction which are as follows :
 96 | 
 97 | > -- | Runs the given transaction on the file. 
 98 | > -- Transaction may fail in which case it returns Nothing.
 99 | > runTransaction :: FT a  -- ^ FileTransaction to be performed 
100 | >                -> TFile -- ^ File on which this transaction is to be performed
101 | >                -> IO (Maybe a)
102 | > runTransaction = runT Nothing False
103 | > 
104 | > -- | Runs the transaction on the file. 
105 | > -- If transaction fails then repeats it with higher priority.
106 | > retryTransaction :: FT a  -- ^ FileTransaction to be performed 
107 | >                  -> TFile -- ^ File on which transaction is performed 
108 | >                  -> IO a
109 | > retryTransaction ft tFile = fromJust <$> runT Nothing True ft tFile 
110 | 
111 | At this point before implementing anything else we are interested in how we will be actiually using them. Here I will also introduce you to the power of composing two transactions and running them as one. Lets comeback to our backing example. 
112 | 
113 | > transfer a b x = do 
114 | >   deposit a (-x) 
115 | >   deposit b x 
116 | 
117 | Here is the function to remove x amount from account A and deposit it to the account B. We have implemented a very loose semantics here as to not check if A's balance is less than 0 etc.  I just wanted to show the power of composing functions. Now we can just do runTransaction on transfer to run this transaction. The semantics of runTransaction automatically takes care of all the possible failures and rollback in case of transaction failure. 
118 | 
119 | Now comes the core function of our implementation which actually perform all the actions. 
120 | 
121 | > runT :: Maybe Integer -- Nothing if transaction never failed else Just id 
122 | >      -> Bool  -- True if the transaction is to be retried in case of failure
123 | >      -> FT a  -- Transaction 
124 | >      -> TFile -- The File on which to run this transaction 
125 | >      -> IO (Maybe a)  
126 | > 
127 | > runT  failure retry ft tFile = do 
128 | >     -- Add the transaction to the transaction queue and get the current 
129 | >     -- fileversion. This has to be removed after commit is performed. 
130 | >     (tid,fileVersion) <- withSynch (FH.synchVar $ fHandle tFile) 
131 | >                                    (addToTransactionQ tFile)
132 | >     let transFile = newTransactionFile tid
133 | >     -- Performs the transaction on the journal file and commits. If commit 
134 | >     -- succeeded then return Just output else Nothing 
135 | >     maybeOut <- withFile transFile $ runAndCommit fileVersion tFile ft 
136 | >     atomicModifyIORef (transactions tFile) $ \q -> (deleteFromQueue q tid, ())
137 | >     if retry then retryIfFailed failure maybeOut fileVersion tid ft tFile 
138 | >         else return maybeOut
139 | >   where 
140 | >     retryIfFailed :: Maybe Integer -> Maybe a -> FileVersion -> Integer
141 | >                   -> FT a -> TFile -> IO (Maybe a) 
142 | >     retryIfFailed Nothing out@(Just a) _ _ _ _ = return out 
143 | >     retryIfFailed (Just tid) out@(Just a) _ _ _ tFile = do   
144 | >         atomicModifyIORef (failedQueue tFile) 
145 | >                           (\q -> (deleteFromQueue q tid,()))
146 | >         return out 
147 | >     retryIfFailed Nothing Nothing fileVersion tid ft tFile = do 
148 | >         atomicModifyIORef (failedQueue tFile) 
149 | >                           (\q -> (DQ.pushBack q $ (tid,fileVersion),()))
150 | >         runT (Just tid) True ft tFile 
151 | >     retryIfFailed (Just tid) Nothing fileVersion _ ft tFile =  
152 | >         runT (Just tid) True ft tFile 
153 | >
154 | >     runAndCommit :: FileVersion -> TFile -> FT a -> FH.FHandle -> IO (Maybe a)
155 | >     runAndCommit fv tFile ft fh = do 
156 | >         out <- trans ft tFile $ Transaction (BlockList [] 8 fh) ReadOnly
157 | >         commit fv tFile out 
158 | >
159 | >     newTransactionFile :: Integer -> String
160 | >     newTransactionFile tid = (show tid) ++ ".trans"
161 | >
162 | >     addToTransactionQ :: TFile -> IO (Integer,FileVersion)
163 | >     addToTransactionQ fh = do 
164 | >         fileVersion <- getFileVersion $ fHandle fh 
165 | >         tid <- maybe (atomicModifyIORef (FH.journalId $ fHandle fh) (\a-> (a+1,a))) return failure
166 | >         atomicModifyIORef (transactions fh) 
167 | >                           (\q -> (DQ.pushBack q $ (tid,fileVersion) , ()))
168 | >         return (tid,fileVersion) 
169 | >
170 | >     deleteFromQueue :: DQ.BankersDequeue (Integer,a) -> Integer
171 | >                     -> DQ.BankersDequeue (Integer,a)
172 | >     deleteFromQueue q id = do 
173 | >         let (a,newQ) = DQ.popFront q 
174 | >         case a of 
175 | >             Nothing -> newQ 
176 | >             Just e@(qid,bl) -> if qid /= id 
177 | >                 then DQ.pushFront (deleteFromQueue newQ id) e 
178 | >                else newQ
179 | >
180 | >     trans :: FT a -> TFile -> Transaction -> IO (a,Transaction)
181 | >     trans (Done a) _ d = do 
182 | >         return (a,d) 
183 | >     trans (ReadBlock bn c) fh d = do 
184 | >         val <- readBlockJ fh bn d
185 | >         rblcks <- addBlock (fromIntegral bn) (rBlocks d)
186 | >         trans (c val) fh $ d {rBlocks = rblcks}
187 | >     -- Experiment with the hashfunction and number of bits.  
188 | >     trans (WriteBlock bn x c) fh d = do 
189 | >         rw <- rToRw d (BF.emptyB (cheapHashes 20) 4096) (newJournal (fHandle fh))
190 | >         let newBl = BF.insertB bn (bloom $ tType rw)  
191 | >         j <- writeToJournal (journal $ tType rw) bn x
192 | >         trans c fh (rw {tType = (tType rw) {bloom = newBl,journal = j}} )
193 | 
194 | All the journals of the transactions get aggregated over time which might result in poor read performances over time. So we need to actually checkpoint the commited journals back to the database file. 
195 | 
196 | > sequencer :: TFile -> IO () 
197 | > sequencer fh = do 
198 | >     b <- readIORef $ jQueue fh
199 | >     if  DQ.null b then yield else do
200 | >         let j = getJournal . fromJust $ DQ.first b
201 | >         replayJournal j 
202 | >         FH.flushBuffer (fHandle fh)
203 | >         popFromJournalQueue j fh 
204 | >         sequencer fh
205 | >     where 
206 | >         popFromJournalQueue j fh = do 
207 | >             tq <- readIORef $ transactions fh 
208 | >             if checkToDelete (journalID j) tq then 
209 | >                     atomicModifyIORef (jQueue fh) 
210 | >                                      (\q -> (snd  $ DQ.popFront q , ()))
211 | >                 else do  
212 | >                     yield
213 | >                     popFromJournalQueue j fh
214 | >         checkToDelete jid tq = do 
215 | >             let (front , _) = DQ.popFront tq 
216 | >             case front of 
217 | >                 Nothing -> True 
218 | >                 Just (id,fv) -> fv >= jid 
219 | >
220 | > 
221 | 
222 | Helpers : 
223 | 
224 | > commit :: FileVersion -> TFile -> (a,Transaction) -> IO (Maybe a) 
225 | > commit  oldFV fh (output,trans) = do 
226 | >     -- synch is used to prevent 2 commits from interleaving 
227 | >     out <- withSynch (commitSynch fh)
228 | >         (do 
229 | >             newFV <- getFileVersion $ fHandle fh  
230 | >             print $ "Old fv " ++ (show oldFV) 
231 | >             cf <- checkSuccess oldFV newFV fh (rBlocks trans) 
232 | >             if  not cf then return Nothing else do 
233 | >                 case trans of 
234 | >                     Transaction _ (ReadWrite bl jr) -> do 
235 | >                         atomicModifyIORef (jQueue fh) 
236 | >                                       (\q -> (DQ.pushBack q $ JInfo jr bl (newFV + 1),()))
237 | >                         commitJournal jr
238 | >                     _ -> return ()
239 | >                 return $ Just output ) 
240 | >     case trans of 
241 | >         Transaction _ t@(ReadWrite _ jl ) -> do FH.flushBuffer $ jHandle jl
242 | >                                                 FH.flushBuffer $ hHandle jl
243 | >         otherwise -> return ()
244 | >     return out 
245 | >
246 | > withSynch :: MVar () -> IO b -> IO b
247 | > withSynch synchVar  = bracket_ (takeMVar synchVar) (putMVar synchVar ())
248 | > withFile :: FilePath -> (FH.FHandle -> IO a) -> IO a 
249 | > -- Blocksize taken to be 4096 Bytes 
250 | > withFile fp = bracket (FH.openF fp ReadWriteMode 4096) FH.closeF
251 | >                   
252 | > rToRw :: Transaction -> BF.Bloom BlockNumber -> IO Journal -> IO Transaction 
253 | > rToRw t@(Transaction rb ReadOnly) bl jl = do 
254 | >     j <- jl 
255 | >     return $ Transaction rb (ReadWrite bl j)
256 | > rToRw t _ _ = return t 
257 | > tMap :: Transaction -> (BF.Bloom BlockNumber -> BF.Bloom BlockNumber) 
258 | >      -> (Journal -> Journal) -> Transaction
259 | > tMap t@(Transaction rb ReadOnly) _ _ = t
260 | > tMap (Transaction rb (ReadWrite bl jl)) fbl fjl = 
261 | >                                 Transaction rb (ReadWrite (fbl bl) (fjl jl))
262 | 


--------------------------------------------------------------------------------
/refs/disk_based_transactions.md:
--------------------------------------------------------------------------------
  1 | % Concurrent Disk Based Transactions in Haskell
  2 | % Satvik Chauhan and Pankaj More 
  3 | % April 16 , 2012
  4 | 
  5 | # Shared Resource Problem 
  6 | 
  7 | <script type="text/javascript"> 
  8 | window.onload = function ()
  9 | {
 10 | var x = document.getElementsByTagName('ul');for(var i = x.length;i--;){var li=x[i];li.className += 'incremental';};
 11 | }
 12 | </script>
 13 | * **Problem** : Share a resource between multiple concurrent threads.
 14 | * Results in several problems.
 15 | * Lost update 
 16 | <pre style="border:none;font-size:60%;background-color:#fff">
 17 | +----------P1-------------+-----+------------P2------------+
 18 | |                         +  1  + w <- read(A)             |
 19 | | u <- read(A)            +  2  +                          |
 20 | | write(A,u+100)          +  3  +                          |
 21 | |                         +  4  + write(A,w+100)           |
 22 | +-------------------------+-----+--------------------------+
 23 | </pre>
 24 | * Incorrect summary problem 
 25 | <pre style="border:none;font-size:60%;background-color:#fff">
 26 | +----------P1-------------+-----+------------P2------------+
 27 | |                         +  1  + w <- read(A)             |
 28 | |                         +  2  + write(A,w-100)           |
 29 | | u1 <- read(A)           +  3  +                          |
 30 | | u2 <- read(B)           +  4  +                          |
 31 | | sum = a + b             +  5  +                          |
 32 | |                         +  6  + x <- read(B)             |
 33 | |                         +  7  + write(B,x+100)           |
 34 | +-------------------------+-----+--------------------------+
 35 | </pre>
 36 | 
 37 | # Shared Resource Problem (Cont)
 38 | 
 39 | * **Solution** : Locks !!
 40 | * Problem with Locks
 41 |     * Race Conditions if locks are forgotten. 
 42 |     * Deadlocks from inconsistent lock ordering. 
 43 |     * Uncaught exceptions might result in any of the above problems. 
 44 |     * Coarse Locks hurt performance.
 45 |     * Locks don't compose.
 46 | 
 47 | # Shared Resource Problem (Cont)
 48 | 
 49 | * Two phase locking 
 50 |     * Any transaction first must acquire locks for all the shared resources.
 51 |     * Perform the operations on the shared resources.
 52 |     * Release all the locks.
 53 | * Still deadlock problem. 
 54 | <pre style="border:none;font-size:60%;background-color:#fff">
 55 | +----------P1-------------+-----+------------P2------------+
 56 | |                         +  1  + Acquire(A)               |
 57 | | Acquire(B)              +  2  +                          |
 58 | |                         +  3  + Acquire(B)               |
 59 | | Acquire(A)              +  4  +                          |
 60 | | Some operations         +  5  +                          |
 61 | | Release(B)              +  6  +                          |
 62 | | Release(A)              +  7  +                          |
 63 | |                         +  8  + Some operations          |
 64 | |                         +  9  + Release(A)               |
 65 | |                         +  10 + Release(B)               |
 66 | +-------------------------+-----+--------------------------+
 67 | </pre>
 68 | 
 69 | * Can prevent deadlocks by acquiring locks in certain fixed order.
 70 | * Still hurts performance
 71 | 
 72 | 
 73 | # Transactions 
 74 | 
 75 | * An optimistic way of managing shared resources.
 76 | * A set of operations which are performed on a resource either fully or none at all. 
 77 | * Helps to maintain a consistent view of the resource. 
 78 | <pre style="border:none;font-size:60%;background-color:#fff">
 79 | +----------T1-------------+-----+------------T2------------+
 80 | |                         +  1  + w <- read(A)             |
 81 | | u1 <- read(A)           +  2  +                          |
 82 | | write(A+100)            +  3  +                          |
 83 | | commit                  +  4  +                          |
 84 | |                         +  5  + write(A,w-100)           |
 85 | |                         +  6  + x <- read(B)             |
 86 | |                         +  7  + write(B,x+100)           |
 87 | |                         +  8  + commit Fails             |
 88 | |                         +  9  + rollback                 |
 89 | +-------------------------+-----+--------------------------+
 90 | </pre>
 91 | * When T2 is rolled back , memory state is same as if T2 has never executed.
 92 | 
 93 | # Transactions (Cont)
 94 | 
 95 | * **Atomicity** 
 96 |     * Transaction either commits or fails 
 97 | * **Consistency**
 98 |     * Resource is in a consistent state after a transaction commits. 
 99 | * **Isolation** 
100 |     * Unaware of the effects of concurrently running transactions. 
101 | * **Durability**
102 |     * Once a transaction commits , changes should persist. 
103 | 
104 | # Software Transactional Memory (STM)
105 | 
106 | * New way of programming on multicore systems. 
107 | * Optimistic way of running transactions. 
108 | * Allows all transactions to run simultaneously.
109 |     * Transactions perform changes to their own local buffer. 
110 |     * At the time of commit, decide success or failure. 
111 |     * Success : The changes become simultaneously visible to other threads. 
112 |     * Failure : No changes made to the memory. 
113 | * Compose well. 
114 | 
115 | # Disk Based Transactions
116 | 
117 | * Schedule
118 |     * A sequence of instructions that specify the chronological
119 |     order in which instructions of concurrent transactions are executed
120 |         * A schedule for a set of transactions must consist of all instructions
121 |         of those transactions
122 |         * A schedule must preserve the order in which the instructions appear in each
123 |         individual transaction.
124 | 
125 | # Serial Schedule 
126 | 
127 | * A serial schedule in which T1 is followed by T2
128 | <pre style="border:none;font-size:60%;background-color:#fff">
129 | +----------T1-------------+-----+------------T2------------+
130 | | read(A)                 +  1  +                          |
131 | | A <- A * 2              +  2  +                          |
132 | | write(A)                +  3  +                          |
133 | |                         +  4  + read(A)                  |
134 | |                         +  5  + A <- A - 50              |
135 | |                         +  6  + write(A)                 |
136 | |                         +  7  + read(B)                  |
137 | |                         +  8  + B <- B + 50              |
138 | |                         +  9  + write(B)                 |
139 | +-------------------------+-----+--------------------------+
140 | </pre>
141 | * A serial schedule in which T2 is followed by T1
142 | <pre style="border:none;font-size:60%;background-color:#fff">
143 | +----------T1-------------+-----+------------T2------------+
144 | |                         +  1  + read(A)                  |
145 | |                         +  2  + A <- A - 50              |
146 | |                         +  3  + write(A)                 |
147 | |                         +  4  + read(B)                  |
148 | |                         +  5  + B <- B + 50              |
149 | |                         +  6  + write(B)                 |
150 | | read(A)                 +  7  +                          |
151 | | A <- A * 2              +  8  +                          |
152 | | write(A)                +  9  +                          |
153 | +-------------------------+-----+--------------------------+
154 | </pre>
155 | 
156 | # Why interleave 
157 | 
158 | * In a serial schedule, if a transaction waits for an I/O operation to complete, 
159 | CPU cycles are wasted 
160 | * Other transactions may also be in line waiting for the completion of a 
161 | transaction. (Hint: Convoy effect)
162 | * For these reasons, serial schedules are generally considered unacceptable in 
163 | practice.
164 | * Interleaving could 
165 |     * utilise idle CPU cycles
166 |     * improve "perceived performance"
167 | 
168 | 
169 | 
170 | # Conflicting Instructions
171 | 
172 | * Instructions I<sub>i</sub> and I<sub>j</sub> of transactions T<sub>i</sub> and T<sub>j</sub> respectively, conflict
173 | if and only if there exists some item Q accessed by both I<sub>i</sub> and I<sub>j</sub>,
174 | and at least one of these instructions wrote Q.
175 | * Intuitively, a conflict between I<sub>i</sub> and I<sub>j</sub> forces a (logical) temporal order between them.
176 |     * If I<sub>i</sub> and I<sub>j</sub> are consecutive in a schedule and they do not conflict, their results would remain the same even if they had been interchanged in the schedule.
177 | 
178 | # Serializabilty
179 | 
180 | * If a schedule S can be transformed into a schedule S´ by a series of
181 | swaps of non-conflicting instructions, we say that S and S´ are
182 | equivalent
183 | * We say that a schedule S is serializable if it is equivalent to a serial schedule
184 | * Serializabilty provides us with *formalism* for valid interleaving
185 | * All possible equivalent schedules are candidates for valid concurrent execution
186 | 
187 | # A Simplified View of Transactions
188 | 
189 | * We ignore operations other than read and write instructions
190 | * We assume that transactions may perform arbitrary computations on data in local buffers in between reads
191 |   and writes
192 | * Our simplified schedules consist of only read and write instructions
193 | 
194 | # Lock Based Implementations 
195 | 
196 | * Global lock on the database file
197 | * Multiple readers can *share* locks in the absence of writer
198 | * Writer requires *exclusive* locks
199 | * Good for read heavy applications
200 | * Inefficient in case of multiple writers
201 | 
202 | 
203 | # Why Haskell?
204 | 
205 | * Haskell's expressive power can improve productivity
206 |     * Small language core provides big flexibility
207 |     * Code can be very concise, speeding development
208 |     * Get best of both worlds from compiled and interpreted languages
209 | 
210 | * Haskell makes code easier to understand and maintain
211 |     * Can dive into complex libraries and understand *what* the code
212 |       is doing
213 | 
214 | * Haskell can increase the robustness of systems
215 |     * Strong typing catches many bugs at compile time
216 |     * Functional code permits better testing methodologies
217 |     * Can parallelize non-concurrent code without changing semantics
218 |     * Concurrent programming abstractions resistant to data races
219 | 
220 | * Lots of compiler Optimizations 
221 |     * Fusion, inlining etc
222 | 
223 | # Haskell is a *pure* functional language
224 | 
225 | * Unlike variables in imperative languages, Haskell bindings are
226 |     * *immutable* - can only bind a symbol once in a give scope<br>
227 |       (We still call bound symbols "variables" though) <br>
228 | 
229 |         ~~~ {.haskell}
230 |         x = 5
231 |         x = 6                      -- error, cannot re-bind x
232 |         ~~~
233 | 
234 |     * *order-independent* - order of bindings in source code does not
235 |        matter
236 | 
237 |     * *lazy* - definitions of symbols are evaluated only when needed
238 | 
239 |         ~~~ {.haskell}
240 |         evens = map (*2) [1..] -- infinite list of even numbers
241 |         main = print (take 50 evens) -- prints first 50 even numbers
242 |         ~~~
243 | 
244 |     * *recursive* 
245 |     
246 | # Haskell , Transactions and STM 
247 | 
248 | * Transactions Using Locks 
249 | 
250 |     ~~~ {.haskell}
251 |     transfer :: Account -> Account -> Int -> IO ()
252 |     transfer to from amount = do 
253 |         acquire fromLock
254 |         acquire toLock 
255 |         withdraw from amount 
256 |         deposit to amount 
257 |         release fromLock 
258 |         release toLock
259 |     ~~~
260 | 
261 | * Easy to result into deadlocks.
262 | 
263 | # Haskell , Transactions and STM (Cont)
264 | 
265 | * STM  
266 | 
267 |     ~~~ {.haskell}
268 |     withdraw :: Account -> Int -> STM ()
269 |     withdraw acc amount = do 
270 |         bal <- readTVar acc
271 |         writeTVar acc (bal - amount) 
272 | 
273 |     deposit :: Account -> Int -> STM ()
274 |     deposit acc amount = withdraw acc (-amount)
275 | 
276 |     transfer :: Account -> Account -> Int -> IO ()
277 |     transfer to from amount = atomically (do 
278 |         withdraw from amount 
279 |         deposit to amount)
280 |     ~~~
281 | 
282 | * No deadlocks 
283 | * Easy to compose 
284 | * Type system prevents any IO operations to be performed inside a transaction.
285 | 
286 | # Disk Based Transactions 
287 | 
288 | * Assumptions
289 |     * Fsync syscall works as advertised
290 |     * The data it reads is *exactly* the same that it previously wrote
291 |     * Writing to disk adheres to  block boundaries
292 | 
293 | # Algorithm
294 | 
295 | * A single transaction file on which all transactions are done 
296 | * A transaction can be either ReadOnly or ReadWrite
297 | * Each read-write transaction has its own *log*
298 | * read-only transactions don't need any *logs*
299 | * A transaction first writes to its own *log*
300 | * Committing a transaction increments the file version of the transaction file
301 | * Reads from the latest valid *log*
302 | 
303 | # Commit workflow
304 | 
305 | ![](1.png)
306 | 
307 | # Commit workflow (Cont)
308 | 
309 | ![](2.png)
310 | 
311 | # Commit workflow (Cont)
312 | 
313 | ![](3.png)
314 | 
315 | # Sequencer
316 | 
317 | * Number of *logs* can grow very fast
318 | * Checkpoint performs the following operations 
319 |     * Merges all the data from *logs* onto the transaction file
320 |     * Removes the *log* reference from the queue
321 |     * Deletes the corresponding *log* file from disk
322 | * Sequencer can be called by the programmer at suitable time
323 | 
324 | # ACIDity
325 | * **Atomicity** 
326 |     * Transaction either commits or fails 
327 | * **Consistency**
328 |     * The set of *valid logs* + transaction file is always consistent
329 | * **Isolation** 
330 |     * A transaction cannot see the effects of other running transactions
331 | * **Durability**
332 |     * Once a transaction commits , fsync must ensure that the data is actually written onto the disk.
333 | 
334 | # Performance Trade-off
335 | 
336 | * There is a trade-off between average read performance and average write performance 
337 | * Keep minimum number of *logs* for maximum read performance
338 |     * Checkpoint as frequently as possible
339 | * For maximum write performance , delay checkpoints as far as possible
340 | * The optimum checkpoint frequency depends on the particular performance requirements of the application
341 | 
342 | 
343 | # Implementation Details
344 | 
345 | * We have captured transaction in a monad. 
346 | 
347 |     ~~~ {.haskell}
348 |     -- Transactions . lhs
349 |     -- | Transaction Datatype
350 |     data FT a =
351 |         Done a |
352 |         ReadBlock BlockNumber ( ByteString -> FT a ) |
353 |         WriteBlock BlockNumber ByteString ( FT a )
354 |     ~~~
355 | 
356 | * Allow 2 operations. 
357 |     * Read data from a block.
358 |     * Write data to a block.
359 | 
360 | * Monad Instance 
361 | 
362 |     ~~~ {.haskell}
363 |     -- Transactions . lhs
364 |     -- | Monad Definition for the Transaction .
365 |     instance Monad FT where
366 |         return = Done
367 |         m >>= f = case m of
368 |             Done a -> f a
369 |             ReadBlock bn c -> ReadBlock bn (\ i -> c i >>= f )
370 |             WriteBlock bn x c -> WriteBlock bn x ( c >>= f )
371 |     ~~~
372 | 
373 | 
374 | # Implementation Details (Cont)
375 | 
376 | * Example Usage:
377 | 
378 |     ~~~ {.haskell}
379 |     deposit :: BlockNumber -> ByteString -> FT ()
380 |     deposit a x = do 
381 |           amount <- ReadBlock a return 
382 |           WriteBlock a (amount + x) (return ())
383 |     ~~~
384 | 
385 | * Explicit notation in terms of bind and return
386 | 
387 |     ~~~ {.haskell}
388 |      ReadBlock a return >>= (\amount -> WriteBlock a (amount +x) (return()))
389 |      ReadBlock a (\i -> return i >>= \amount -> WriteBlock a (amount + x) (return()))
390 |      ReadBlock a (\i -> Done i  >>= \amount -> WriteBlock a (amount + x) (return()))
391 |      ReadBlock a (\i -> WriteBlock a (i + x) (return()))
392 |     ~~~
393 | 
394 | # Implementation Details (Cont)
395 | 
396 | * Defining monad instance has 2 advantages
397 |     * Type Checker prevents the user from performing any other IO operations. 
398 |     
399 |         ~~~ {.haskell}
400 |         deposit :: BlockNumber -> ByteString -> FT ()
401 |         deposit a x = do 
402 |             amount <- ReadBlock a return 
403 |             print amount -- Not allowed by type checker 
404 |             WriteBlock a (amount + x) (return ())
405 |         ~~~
406 |     
407 |     * Easy composition 
408 | 
409 |         ~~~ {.haskell}
410 |         transfer :: BlockNumber -> BlockNumber -> ByteString -> FT ()
411 |         transfer a b x = deposit a (-x) >> deposit b x
412 |         ~~~
413 | 
414 |         ~~~ {.haskell}
415 |         transfer :: BlockNumber -> BlockNumber -> ByteString -> FT ()
416 |         transfer a b x = do 
417 |             deposit a (-x)
418 |             deposit b x
419 |         ~~~
420 | 
421 | # Implementation Details (Cont)
422 | 
423 | * Running a transaction 
424 | 
425 |     ~~~ {.haskell}
426 |     runTransaction :: FT a -> TFile -> IO a
427 |     runTransaction (transfer a b 100)
428 |     retryTransaction :: FT a -> TFile -> IO a
429 |     retryTransaction (transfer a b 100)
430 |     ~~~
431 | 
432 | * runTransaction ensures all the properties of a transaction. 
433 | * Transaction may fail while using runTransaction. 
434 | * retryTransaction ensures that transaction is repeated until it succeeds.
435 | * Too many Journals might result in poor read performance.
436 | * Sequencer copies the data to the main file and remove the intermediate journal files. 
437 | 
438 |     ~~~ {.haskell}
439 |     sequencer :: TFile -> IO () 
440 |     ~~~
441 | 
442 | # Testing 
443 | 
444 | * Ensuring robustness and quality of the implementation.
445 | * QuickCheck : Tool for type based testing.
446 | * Invariants checked on random testcases generated from the function type. 
447 | 
448 |     ~~~ {.haskell}
449 |     qsort :: Ord a => [a] -> [a]
450 |     qsort (x:xs) = qsort lhs ++ [x] ++ qsort rhs
451 |         where lhs = filter  (< x) xs
452 |               rhs = filter (>= x) xs
453 |     -- Quickcheck property to check 
454 |     prop_sortcheck xs = qsort (qsort xs) == qsort xs
455 |     ~~~ 
456 | 
457 |     ~~~ {.haskell}
458 |     ghci> quickCheck (prop_sortcheck :: [Integer] -> Bool)
459 |     *** Failed! Exception: 'qsort.hs:(3,1)-(5,32): Non-exhaustive patterns in function qsort' (after 1 test): 
460 |     []
461 |     ~~~ 
462 | 
463 | * Correcting the code 
464 | 
465 |     ~~~ {.haskell}
466 |     qsort [] = []
467 |     ~~~ 
468 | 
469 |     ~~~ {.haskell}
470 |     ghci> quickCheck (prop_sortcheck :: [Integer] -> Bool)
471 |     +++ OK, passed 100 tests.
472 |     ~~~
473 | 
474 | # Testing (Cont)
475 | 
476 | * Keeping things pure. 
477 | * Easier to reason about code with no side effects.
478 | * Simulate File system without actually performing IO for testing purposes.
479 | * Different types for actual code and testing code. 
480 | * Type Classes.
481 | 
482 |     ~~~ {.haskell}
483 |     -- Backend.hs
484 |     class (Show a,Monad m) => Backend b m a | b -> m where 
485 |         data Handle :: * -> * 
486 |         type BlockNumber :: *  
487 |         open :: FilePath -> Mode -> m (Handle b)
488 |         close :: Handle b -> m () 
489 |         readBlock ::(Eq BlockNumber)=>Handle b->BlockNumber->m a 
490 |         writeBlock ::(Eq BlockNumber)=>Handle b->BlockNumber->a->m ()
491 |         sync :: Handle b -> m ()
492 |     ~~~ 
493 | 
494 | # Testing (Cont)
495 | 
496 | * File System being simulated in memory.
497 | 
498 |     ~~~ {.haskell}
499 |     -- Backend.hs
500 |     data TestDisk a = TestDisk {
501 |         disk :: M.Map FilePath (File a)
502 |        , buffers :: M.Map FilePath (File a) 
503 |        , bufferSize :: Int -- Current buffer Size 
504 |        , openFiles :: M.Map FilePath Mode
505 |         } deriving (Eq,Show)
506 |     data File a = File {
507 |         blocks :: M.Map Int a 
508 |        , size :: Int 
509 |         } deriving (Eq, Show)
510 |     
511 |     data Mode = ReadWrite | Read | Write deriving (Eq,Show) 
512 |     ~~~ 
513 | 
514 | 
515 | # Testing (Cont)
516 | 
517 | * Instance of the Backend Class . Pure and no side effects. 
518 | 
519 |     ~~~ {.haskell}
520 |     instance Show a => Backend (TestDisk a)  (State (TestDisk a)) a where 
521 |         data Handle (TestDisk a) = Handle FilePath 
522 |         type BlockNumber = Int 
523 |         open fp m = do 
524 |             disk <- get
525 |             put $ openFile disk fp m 
526 |             return $ Handle fp
527 |         close (Handle fp) = do 
528 |             disk <- get 
529 |             put $ closeFile disk fp
530 |         readBlock (Handle fp) bn = do 
531 |             t <- get 
532 |             case readB t fp bn buffers of 
533 |                 Nothing -> error "Block Not Found"
534 |                 Just a -> return a
535 |         writeBlock (Handle fp) bn a = do 
536 |             t <- get 
537 |             put $ writeB t fp bn a 
538 |         sync (Handle fp) = do  
539 |             t <- get 
540 |             put $ flushBuffer t fp
541 |     ~~~
542 | 
543 | 
544 | 
545 | 
546 | 
547 | 


--------------------------------------------------------------------------------
/refs/report.tex:
--------------------------------------------------------------------------------
  1 | \documentclass[11pt,a4paper]{article}
  2 | \usepackage{graphicx}
  3 | \usepackage{alltt}
  4 | \usepackage{color}
  5 | \usepackage{fancyhdr}
  6 | \usepackage{listings}
  7 | \usepackage{multicol}
  8 | \usepackage{hyperref}
  9 | \pagestyle{fancy}
 10 | \definecolor{gray_ulisses}{gray}{0.55}
 11 | \definecolor{lightgray}{gray}{0.95}
 12 | \definecolor{castanho_ulisses}{rgb}{0.71,0.33,0.14}
 13 | \definecolor{preto_ulisses}{rgb}{0.41,0.20,0.04}
 14 | \definecolor{green_ulises}{rgb}{0.2,0.75,0}
 15 | \lstdefinelanguage{HaskellU}
 16 | {
 17 | 	basicstyle=\ttfamily\scriptsize,
 18 | 	backgroundcolor=\color{lightgray},
 19 | 	%frameshape={RYRYNYYYY}{yny}{yny}{RYRYNYYYY}, %contornos... muito nice...
 20 | 	sensitive=true,
 21 | 	morecomment=[l][\color{gray_ulisses}\scriptsize]{--},
 22 | 	morecomment=[s][\color{gray_ulisses}\scriptsize]{\{-}{-\}},
 23 | 	morestring=[b]",
 24 | 	stringstyle=\color{red},
 25 | 	showstringspaces=false,
 26 | 	numbers=left,
 27 | 	firstnumber=last,
 28 | 	numberstyle=\tiny,
 29 | 	numberblanklines=true,
 30 | 	showspaces=false,
 31 | 	showtabs=false,
 32 | 	xleftmargin=10pt,
 33 | 	xrightmargin=-5pt,
 34 | 	emph=
 35 | 	{[2] return,fromJust,readBlockT,writeBlockT,runTransaction,retryTransaction,sequencer,runT,transfer,deposit
 36 | ,sync,open,close,readBlock,writeBlock,get,put,closeFile,openFile,readB,writeB,flushBuffer	},
 37 | 	emphstyle={[2]\color{blue}},
 38 | 	emph=
 39 | 	{[1] BS,ByteString, TFile,BlockNumber,FT,Maybe,IO,FilePath,Eq,Show,Monad,Handle,Mode,State,TestDisk,Backend,Int
 40 | 	},
 41 | 	emphstyle={[1]\color{castanho_ulisses}},
 42 | 	emph=
 43 | 	{[3]
 44 | 		case,class,data,deriving,do,type,else,if,import,in,infixl,infixr,instance,let,
 45 | 		module,of,primitive,then,type,where,ReadBlock,WriteBlock,Done,Nothing,Just,True,False
 46 | 	},
 47 | 	emphstyle={[3]\color{preto_ulisses}\textbf},
 48 | 	emph=
 49 | 	{[4] 
 50 | 	},
 51 | 	emphstyle={[4]\color{green_ulises}\textbf}
 52 | }
 53 | 
 54 | \lstnewenvironment{code}[1][]
 55 | {\lstset{language=HaskellU,#1}}
 56 | {}
 57 | 
 58 | \title{ Concurrent Disk-Based Transactions in Haskell}
 59 | \author{Satvik Chauhan \and Pankaj More}
 60 | \date{{\small \today}}
 61 | %
 62 | \begin{document}
 63 | \maketitle
 64 | %
 65 | \begin{abstract}
 66 | Transactions form an integral part of any multi-user concurrent database system. Most databases implement transactions by various locking mechanism.
 67 | In this project, we present a lock free composable file transaction system implemented in a high level functional programming language, Haskell.Our library allows easy composition of transactions and running them as a single atomic transaction thus providing a powerful abstraction in the programmer's tool-kit. The implementation is not only highly concurrent but also non blocking as there are no locks on files. We have exploited the space efficient probabilistic data structure called Bloom Filters to keep primary memory requirements low.
 68 | 
 69 | 
 70 | \emph{Keywords:} Concurrency, Transactions, Databases, Haskell
 71 | 
 72 | \end{abstract}
 73 | 
 74 | %
 75 | \section{Introduction}
 76 | 
 77 | The major problem in a concurrent system with shared resource is how to share the same resource between all the processes. The traditional way of keeping data consistent is with locks, and we notify threads of changes using condition variables. The example of such mechanism is Haskell's MVar. The drawbacks of using such mechanism is :
 78 | \begin{itemize}
 79 | \item Race conditions due to forgotten locks
 80 | \item Deadlocks resulting from inconsistent lock ordering 
 81 | \item Uncaught exceptions resulting in corruption 
 82 | \end{itemize}
 83 | 
 84 | A transaction is an optimistic way of achieving the same thing without hurting the concurrent part as it allows  all the operations to run concurrently while providing mechanisms to roll-back the resource to a consistent state in case of any failure.
 85 | 
 86 | Formally, a transaction is a group of operations combined into a logical unit of work. Developer uses transactions to control and maintain the consistency and integrity of each action in a transaction, despite errors that might occur in the system either due to concurrency or hardware failure. In database context a transaction on a database is considered to be set of operations performed such that database is in a consistent state before and after the transaction and in case of failure the system must provide ways to roll-back partial transactions to bring the database back into a consistent state. Transaction in the database environment mainly serve two purposes:
 87 | 
 88 | \begin{enumerate}
 89 | \item To provide reliable units of work that allow correct recovery from failures and keep a database consistent even in cases of system failure, when execution stops (completely or partially) and many operations upon a database remain uncompleted, with unclear status.
 90 | \item To provide isolation between programs accessing a database concurrently. If this isolation is not provided the programs outcome are possibly erroneous.
 91 | \end{enumerate}
 92 | 
 93 | Transactions provide an "all-or-nothing" proposition, stating that each work-unit performed in a database must either complete in its entirety or have no effect whatsoever. Further, the system must isolate each transaction from other transactions, results must conform to existing constraints in the database, and transactions that complete successfully must get written to durable storage. Thus, even in case of hardware failure a transaction once commited must persist. A transaction is expected satisfy some guarantees which are often called ACID guarantees: 
 94 | 
 95 | \begin{itemize}
 96 | \item \emph{Atomicity} means a transaction can end only in two ways: either successfully, in which case all its effects are written to a durable storage and persist between power failures, or unsuccessfully, in which case it has no effect, it can be assumed that this transaction never happened. 
 97 | \item \emph{Consistency} just means that a transaction is written correctly, so that when it completes successfully, the database is in a consistent state.
 98 | \item \emph{Isolation} means that the transaction appears to execute completely alone, even if, in fact, other transactions are running simultaneously. In other words, transaction always sees a consistent snapshot of the database and is totally unaware of the changes made by other transactions which are running concurrently with it. 
 99 | \item \emph{Durability} means that a successful transaction's changes are permanent and persist in all possible sorts of failures. In practice this means to be written on disk. 
100 | \end{itemize}
101 | 
102 | Software Transactional Memory (STM) is a new way of programming in a shared memory parallel processors. STM gives us a few simple, but powerful, tools with which we can address most of these problems. We execute a block of actions as a transaction using the atomically combinator. Once we enter the block, other threads cannot see any modifications we make until we exit, nor can our thread see any changes made by other threads. These two properties mean that our execution is isolated.Upon exit from a transaction, exactly one of the following things will occur.
103 | \begin{itemize}
104 | \item If no other thread concurrently modified the same data as us, all of our modifications will simultaneously become visible to other threads.
105 | \item Otherwise, our modifications are discarded without being performed.
106 | \end{itemize}
107 | 
108 | Concurrency in GHC (Glasgow Haskell Compiler) is "lightweight", which means that both thread creation and context switching overheads are extremely low. We can create thousands of threads simultaneously without any problem. Scheduling of Haskell threads is done internally in the Haskell runtime system, and doesn't make use of any operating system-supplied thread packages.
109 | 
110 | In this library we have tried to imitate and provide similar high-level interface to our transactions library as is provided in case of STM. We will see the example of our interface later.
111 | 
112 | 
113 | 
114 | \section{Lock Based Implementations}
115 | Most of the existing implementations use locks on files for concurrency control. A simplified model of database file locking  is given below.
116 | 
117 | From the point of view of a single process, a database file can be in one of five locking states:
118 | \begin{itemize}
119 | \item \emph{Unlocked:} No locks are held on the database. The database may be neither read nor written. Any internally cached data is considered suspect and subject to verification against the database file before being used. Other processes can read or write the database as their own locking states permit. This is the default state.
120 | \item \emph{Shared:} The database may be read but not written. Any number of processes can hold shared locks at the same time, hence there can be many simultaneous readers. But no other thread or process is allowed to write to the database file while one or more shared locks are active.
121 | \item \emph{Reserved:}  A reserved lock means that the process is planning on writing to the database file at some point in the future but that it is currently just reading from the file. Only a single reserved lock may be active at one time, though multiple shared locks can coexist with a single reserved lock. Reserved differs from pending in that new shared locks can be acquired while there is a reserved lock.
122 | \item \emph{Pending:}  A pending lock means that the process holding the lock wants to write to the database as soon as possible and is just waiting on all current shared locks to clear so that it can get an exclusive lock. No new shared locks are permitted against the database if a pending lock is active, though existing shared locks are allowed to continue.
123 | \item \emph{Exclusive:} An exclusive lock is needed in order to write to the database file. Only one exclusive lock is allowed on the file and no other locks of any kind are allowed to coexist with an exclusive lock.
124 | \end{itemize}
125 | 
126 | An example of database engine which follows such model is SQlite.
127 | Multiple readers can read from the database file. 
128 | Only a single writer can write to the database at any given time and all the reader have to wait till the write is complete.
129 | Hence such a model is good for heavy read-based applications but inefficient when multiple write transactions try to modify the database concurrently.
130 | 
131 | \pagebreak
132 | 
133 | \section{Algorithm}
134 | 
135 | \subsection{Simplified version}
136 | 
137 | We ignore operations other than read and write instructions. We assume that transactions may perform arbitrary computations on data in local buffers in between reads and writes. Our simplified schedules consist of only read and write instructions.
138 | 
139 | Any transaction has access to following three operations : readblock, writeblock and checkpoint.
140 | 
141 | 
142 | 
143 | \subsection{Hardware Assumptions} FT does not assume that sector writes are atomic . Even though newer disk storage systems guarantee sector writes as atomic using miniature battery based power supply just after a power failure , FT follows a pessimistic approach and assumes that no such facility might be provided by the underlying hardware.
144 | 
145 | FT assumes that the operating system will buffer writes and that a write request will return before data has actually been stored onto the disk. FT assumes that the flush or fsync will not return until all pending write operations for the file that is being flushed have completed. This assumption might not hold in actual practice. It is possible that the flush and fsync system calls are broken on windows and linux. This is unfortunate as it might lead to data inconsistency during a power failure. However, there is nothing that FT can do to test for or remedy the above situation. The possibility of such scenarios can be reduced by guaranteeing good uptime quality of the hardware.
146 | 
147 | FT assumes that the detection and/or correction of bit errors caused by cosmic rays, thermal noise, quantum fluctuations, device driver bugs, or other mechanisms, is the responsibility of the underlying hardware and operating system. FT does not add any redundancy to the database file for the purpose of detecting corruption or I/O errors. FT assumes that the data it reads is exactly the same data that it previously wrote.
148 | 
149 | By default, FT assumes that an operating system call to write a range of bytes will not damage or alter any bytes outside of that range even if a power lose or OS crash occurs during that write. 
150 | 
151 | \subsection{Commit Workflow}
152 | For maintaining the transaction information and efficient execution of concurrent transactions , we utilize concepts like write ahead logging (WAL) in SQlite, bloomfilters, serializability, etc.
153 | 
154 | We use bloomfilters to find out rapidly and memory-efficiently, whether a block is present in a journal or not.
155 | A Bloom filter is a space-efficient probabilistic data structure that is used to test whether an element is a member of a set. False positives are possible, but false negatives are not; i.e. a query returns either "inside set (may be wrong)" or "definitely not in set".
156 | Every journal has a corresponding bloomfilter. Bloomfilters instead of indexes are kept in memory to keep memory requirements low and provide good read performance at the same time.
157 | Bloomfilter keeps track of the list of blocks in a journal file.
158 | 
159 | We store a global queue of committed(valid) journals.
160 | 
161 | If a transaction is purely read-based , it need not keep any journal. 
162 | If the transaction is in ReadWrite Mode , it will have its own journal. 
163 | 
164 | ReadBlock first tries reading from its own journal if it exists. Then it tries to read from the queue of commited journals. It doesnt actually need to perform I/O on every journal to find the block. It checks the bloomfilter associated with each journal and if bloomfilter reports true , it actually performs I/O to find out if it is true. As a side note, it is obvious that the goal is to minimize the false rate positive of bloomfilter so that such costly disk accesses can be minimized. Proper real time benchmarking and fine-tuning of bloom filter parameters can reduce false-positive rate and optimize the performance. If it really finds the block , then it fetches the data from the disk and returns it. If the bloomfilter had signalled a false-positive , it needs to continue its search further down the queue recursively. If the queue is fully traversed and the block is not found , it finally reads from the main database file. 
165 |  
166 | WriteBlock simply writes the block data to its own journal file. It might latter fail to commit if there are conflicts. This is discussed below.
167 | 
168 | When a transaction needs to commit , it first checks whether there is a conflict between the blocks that it read and the blocks changed between the file version when it started and the current file version. If a conflict exists , the transaction must abort. If there are no such conflicts and it was a read only transaction then it commits successfully. Whereas,  in case of a read-write transaction , it adds its journal to the journal queue with the incremented file version. It also increments the file version of the main database file. It flushes the required buffers so that the data gets actually written to the disk to guarantee durability.
169 | 
170 | 
171 | 
172 | 
173 | \subsection{Acid Guarantees}
174 | \begin{itemize}
175 | \item Atomicity: 
176 |     Transaction either commits or aborts.
177 | \item Consistency: 
178 |     The set of valid journals and transaction file is always consistent
179 | \item Isolation: 
180 | 	Since the journal of a concurrently running transaction is not yet valid(as it is not yet committed) , the journal must not be in the global journal queue. Hence , a concurrent transaction can never see the change made by another running transaction.
181 | \item Durability: 
182 |     Once a transaction commits , fsync must ensure that the data is actually written onto the disk. Hence assuming our calls to fsync are always called at the appropriate time , the burden of durability rests on the fsync system call correctness in the host operating system.
183 | \end{itemize}
184 | 
185 | \subsection{Sequencer}
186 | If there are large number of transactions executing concurrently , the number of journals can grow large extremely fast. There is a need to keep the number of journals in control else it would lead to extremely bad read performance. A checkpoint operation transfers all the data in journal files to the main database file. Hence , frequent checkpointing would reduce the overhead of read operations.
187 | 
188 | \subsection{Read-write Trade-off}
189 | There is a trade-off between average read performance and average write performance. To maximize the read performance, one wants to keep the number of journal files as small as possible and hence run checkpoints frequently. To maximize write performance, one wants to amortize the cost of each checkpoint over as many writes as possible, meaning that one wants to run checkpoints infrequently and let the no of journal files grow as large as possible before each checkpoint. The decision of how often to run checkpoints may therefore vary from one application to another depending on the relative read and write performance requirements of the application.
190 | 
191 | 
192 | 
193 | \pagebreak
194 | \section{Implementation Details}
195 | 
196 | Here we will explain some important parts of our implementations which are directly taken from the lhs files. The explanation is in the blog style of writing.
197 | 
198 | Lets try to capture the transaction into a data-type first. Here
199 | we are implementing a very basic version of transactions. So our
200 | transaction system only provide two type of operations to be performed
201 | on the file.
202 | 
203 | \begin{enumerate}
204 | \item \textbf{ReadBlock} is to read the data from the given block number.
205 | \item \textbf{WriteBlock} is to write the given data on the block number provide.
206 | \end{enumerate}
207 | 
208 | We can also think of adding operations like append block, modify block
209 | etc., but to keep it simple we only support these two basic operations.
210 | Now lets look at the data definition of the File Transaction (FT) data
211 | type.
212 | 
213 | \begin{code}[name=Transactions]
214 | -- Transactions.lhs
215 | -- | Transaction Datatype
216 | data FT a = 
217 |     Done a | 
218 |     ReadBlock BlockNumber (BS.ByteString -> FT a) | 
219 |     WriteBlock BlockNumber BS.ByteString (FT a)
220 | \end{code}
221 | 
222 | ByteString is made explicit as all the data needs to be serialized to ByteString before writing.
223 | The types of ReadBlock and WriteBlock looks a little odd. We will see later how it helps in actually passing the continuations.
224 | Lets see the monad definiton of the FT datatype.
225 | 
226 | \begin{code}[name=Transactions]
227 | -- Transactions.lhs
228 | -- | Monad Definition for the Transaction. 
229 | instance Monad FT where 
230 |     return = Done 
231 |     m >>= f = case m of 
232 |         Done a -> f a 
233 |         ReadBlock bn c -> ReadBlock bn (\i -> c i  >>= f) 
234 |         WriteBlock bn x c -> WriteBlock bn x (c >>= f)
235 | \end{code}
236 | 
237 | Here we will see how the types of ReadBlock and WriteBlock actually help
238 | in our continuation passing style of programming. Lets see a simple
239 | example of interface our implementation provides.
240 | Consider the famous banking example for transactions. We want to
241 | transfer x fund from account A to account B.
242 | Lets assume that fund informations of A and B are stored in the same
243 | file at block number a and b respectively.
244 | First we will see the basic example to deposit amount to an account. 
245 | Here is a function which deposits x amount to the given account. 
246 | 
247 | \begin{code}
248 | deposit a x = do 
249 |       amount <- ReadBlock a return 
250 |       WriteBlock a (amount + x) (return ())
251 | \end{code}
252 | 
253 | Lets see how this is translated to explicit notation without do
254 | notation.
255 | 
256 | \begin{code}[stepnumber=2]
257 |  ReadBlock a return >>= (\amount -> WriteBlock a 
258 |    (amount +x) (return()))
259 |  ReadBlock a (\i -> return i >>= 
260 |    \amount -> WriteBlock a (amount + x) (return()))
261 |  ReadBlock a (\i -> Done i  >>= 
262 |    \amount -> WriteBlock a (amount + x) (return()))
263 |  ReadBlock a (\amount -> 
264 |    WriteBlock a (amount + x) (return()))
265 | \end{code}
266 | 
267 | Looks like it got transformed to whatever we wanted. This is how continuation passing style actually works.
268 | It was a little frustrating to write return while writing ReadBlock and
269 | WriteBlock. So lets define some helpers to help us avoiding the
270 | repetitions.
271 | 
272 | \begin{code}[name=Transactions]
273 | -- Transactions.lhs
274 | -- | readBlockT to be used inside the FT Monad 
275 | readBlockT :: BlockNumber -> FT BS.ByteString
276 | readBlockT = flip ReadBlock return 
277 | -- | writeBlockT to be used inside the FT Monad 
278 | writeBlockT :: BlockNumber -> BS.ByteString -> FT ()
279 | writeBlockT v x =  WriteBlock v x $ return () 
280 | \end{code}
281 | 
282 | Now the above deposit function can easily be written as:
283 | 
284 | \begin{code}
285 | deposit a x = do 
286 |       amount <- readBlockT a 
287 |       writeBlockT a (amount + x)
288 | \end{code}
289 | 
290 | Now we want to actually perform the transactions satisfying all the ACID
291 | guarantees. So we need to write a function to actually convert our
292 | Transactions from FT monad to IO monad and perform them.
293 | According to the semantics of a transaction, a transaction can either
294 | fail or succeed. So we should provide atleast two types of functions to
295 | run a transaction which are as follows:
296 | 
297 | \begin{code}[name=Transactions]
298 | -- Transactions.lhs
299 | -- | Runs the given transaction on the file. 
300 | -- Transaction may fail in which case it returns Nothing.
301 | runTransaction :: FT a -> TFile -> IO (Maybe a)
302 | runTransaction = runT Nothing False
303 | -- | Runs the transaction on the file. 
304 | -- If transaction fails then reruns it
305 | -- with higher priority.
306 | retryTransaction :: FT a -> TFile -> IO a
307 | retryTransaction ft tFile = fromJust <$> 
308 | 	runT Nothing True ft tFile 
309 | \end{code}
310 | 
311 | At this point before implementing anything else we are interested in how
312 | we will be actually using them. Here I will also introduce you to the
313 | power of composing two transactions and running them as one. Lets
314 | comeback to our backing example.
315 | 
316 | \begin{code}[name=Transactions]
317 | transfer a b x = do 
318 |   deposit a (-x) 
319 |   deposit b x
320 | \end{code}
321 | 
322 | Here is the function to remove x amount from account A and deposit it to
323 | the account B. We have implemented a very loose semantics here as to not
324 | check if A's balance is less than 0 etc.. I just wanted to show the power
325 | of composing functions. Now we can just do runTransaction on transfer to
326 | run this transaction.
327 | 
328 | \begin{code}[name=Transactions]
329 | -- Transactions.lhs
330 | runTransaction (transfer a b 100)
331 | \end{code}
332 | 
333 | The semantics of runTransaction automatically
334 | takes care of all the possible failures and roll-back in case of
335 | transaction failure.
336 | 
337 | runTransaction actually has to take care of lots of things:
338 | 
339 | \begin{itemize}
340 | \item Gets the file Version at which the transaction started.
341 | \item Performs the transactions on the Journal File 
342 | \item Then tries to commit the transaction.Checks the present version against the version at which it started. Here comes the idea of space efficient Bloom filters which are actually used to check whether the current transaction should commit successfully or not. This operation is in a critical section as it might leave the file in inconsistent state if it is not isolated. 
343 | \item return the output if transaction succeeds otherwise adds the failed transactions to the failed queue. If retryTransaction was used then it again tries to perform the transaction. 
344 | \end{itemize}
345 | 
346 | All the journals of the transactions get aggregated over time which might result in poor read performances over time. So we need to actually checkpoint the committed journals back to the database file.
347 | 
348 | \begin{code}[name=Transactions]
349 | -- Transactions.lhs
350 | sequencer :: TFile -> IO () 
351 | \end{code}
352 | 
353 | Sequencer actually does this. It checks for the committed journal files, writes them to the actual database and removes them. 
354 | 
355 | \section{Future Improvements}
356 | None of the implementation is good for all the purposes. This implementation is good when we have lots of not colliding write transactions. If too many transaction collide and fail then we get poor concurrency. Thus other goal would be to add locking transactions which actually perform the write transactions sequentially similar to sqlite. 
357 | Other step will be in the direction of heavily testing the implementation. As it is always very difficult to test alone concurrent applications, we on the other hand also have lots of file operations. The quickcheck library in Haskell provides very good testing mechanism but it requires to separate the backed from the IO monad and to provide an abstraction over it.
358 | Here is our basic approach of creating a pure interface to IO which actually don't perform IO but simulate the file-system. 
359 | 
360 | Here the basic idea is to capture the Backend in a typeclass. We will be exploiting the type-families extension of Haskell to separate backend from the actual transaction implementation. This has great advantage to replace our backend with whatever kind we want.
361 | 
362 | \begin{code}[name=Backend,firstnumber=1]
363 | -- Backend.hs
364 | class (Show a,Monad m) => Backend b m a | b -> m where 
365 |     data Handle :: * -> * 
366 |     type BlockNumber :: *  
367 |     open :: FilePath -> Mode -> m (Handle b)
368 |     close :: Handle b -> m () 
369 |     readBlock ::(Eq BlockNumber)=>Handle b->BlockNumber->m a 
370 |     writeBlock ::(Eq BlockNumber)=>Handle b->BlockNumber->a->m ()
371 |     sync :: Handle b -> m ()
372 | \end{code}
373 | 
374 | We have added the basic functions which we need in the transaction implementation. This greatly condense the side effects possible to only these functions.
375 | 
376 | Here is the pure implementation of the file system in terms of state monad. 
377 | 
378 | \begin{code}[name=Backend,firstnumber=last]
379 | -- Backend.hs
380 | instance Show a => Backend (TestDisk a)  (State (TestDisk a)) a where 
381 |     data Handle (TestDisk a) = Handle FilePath 
382 |     type BlockNumber = Int 
383 |     open fp m = do 
384 |         disk <- get
385 |         put $ openFile disk fp m 
386 |         return $ Handle fp
387 |     close (Handle fp) = do 
388 |         disk <- get 
389 |         put $ closeFile disk fp
390 |     readBlock (Handle fp) bn = do 
391 |         t <- get 
392 |         case readB t fp bn buffers of 
393 |             Nothing -> error "Block Not Found"
394 |             Just a -> return a
395 |     writeBlock (Handle fp) bn a = do 
396 |         t <- get 
397 |         put $ writeB t fp bn a 
398 |     sync (Handle fp) = do  
399 |         t <- get 
400 |         put $ flushBuffer t fp
401 | \end{code}
402 | 
403 | Such an implementation allows us to test lots of the properties which are otherwise difficult to test. It also helps in explicitly specifying what kind of expectation and assumptions you make on your backend.
404 | 
405 | The very simple representation of the disk simulated as a map between FilePath and File. 
406 | We also have buffers to simulate sync. This will help to simulate power failures and hardware crashes as the data present in the buffers are lost but not the disk. \\
407 | The File representation is very simple. Just a map between block number and the data present at that location.
408 | 
409 | \begin{code}[name=Backend]
410 | -- Backend.hs
411 | data TestDisk a = TestDisk {
412 |     disk :: M.Map FilePath (File a)
413 |    , buffers :: M.Map FilePath (File a) 
414 |    , bufferSize :: Int -- Current buffer Size 
415 |    , openFiles :: M.Map FilePath Mode
416 |     } deriving (Eq,Show)
417 | data File a = File {
418 |     blocks :: M.Map Int a 
419 |    , size :: Int 
420 |     } deriving (Eq, Show)
421 | 
422 | data Mode = ReadWrite | Read | Write deriving (Eq,Show) 
423 | \end{code}
424 | 
425 | This was a very basic structure of file system which we will be using to write test-cases using quick check. Another major thing left to abstract is concurrency and IORefs. This will be our next focus of work in the future. Another area of focus will be into optimizing the implementation and comparing its performance with sqlite. This will greatly improve our understanding whether Haskell is suitable for writing industrial strength databases. 
426 | 
427 | 
428 | \bibliographystyle{IEEEbib}
429 | \begin{thebibliography}{10}
430 | \bibitem[1]{stm}Anthony Discolo, Tim Harris, Simon Marlow, Simon Peyton Jones, and Satnam Singh. Lock-free data structures using STMs in Haskell. In Eighth International Symposium on Functional and Logic Programming (FLOPS’06), April 2006.
431 | \bibitem[2]{sqlite}Atomic Commit in sqlite, \url{http://www.sqlite.org/atomiccommit.html}
432 | \bibitem[3]{bloomfilter}Bloom Filter, \url{http://en.wikipedia.org/wiki/Bloom_filter}
433 | \bibitem[4]{WAL}Write-Ahead Logging, \url{http://www.sqlite.org/draft/wal.html}
434 | \end{thebibliography}
435 | 
436 | \pagebreak
437 | 
438 | 
439 | 
440 | \begin{figure}[htb]
441 | \centering
442 | \includegraphics[angle=90]{1}
443 | \end{figure}
444 | 
445 | \pagebreak
446 | 
447 | \begin{figure}[htb]
448 | \centering
449 | \includegraphics[angle=90]{2}
450 | \end{figure}
451 | 
452 | \pagebreak
453 | 
454 | \begin{figure}[htb]
455 | \centering
456 | \includegraphics[angle=90]{2}
457 | \end{figure}
458 | 
459 | \pagebreak
460 | 
461 | \end{document}
462 | 


--------------------------------------------------------------------------------