├── src ├── Makefile ├── Process │ ├── Makefile │ ├── Timer.hs │ ├── Peer │ │ ├── Sender.hs │ │ ├── Receiver.hs │ │ └── SenderQ.hs │ ├── Listen.hs │ ├── DirWatcher.hs │ ├── FS.hs │ ├── Console.hs │ ├── TorrentManager.hs │ ├── Status.hs │ ├── PeerMgr.hs │ └── ChokeMgr.hs ├── AdaptGhcVersion.hs ├── Tracer.hs ├── Version.hs.in ├── Digest.hs ├── Test.hs ├── TestInstance.hs ├── Channels.hs ├── Data │ ├── PendingSet.hs │ ├── Queue.hs │ └── PieceSet.hs ├── RateCalc.hs ├── Process.hs ├── Torrent.hs ├── Supervisor.hs ├── Combinatorrent.hs ├── FS.hs └── Protocol │ ├── BCode.hs │ └── Wire.hs ├── INSTALL.md ├── testsuite ├── Makefile └── testfile.txt.torrent ├── doc ├── trackerstate.dot ├── Makefile ├── timer.dot ├── PHierachy.dot ├── git.md ├── PieceManager.dot ├── processes.dot ├── Peer.dot ├── on-hieraches.md ├── process-transformers.lhs ├── piecemanager.lhs └── haskell-vs-erlang.mkd ├── AUTHORS ├── tools ├── Makefile ├── oprofile.sh ├── runCombinatorrent.sh ├── postproc.hs └── visualize_stats.R ├── .gitignore ├── configure ├── .travis.yml ├── Setup.lhs ├── Makefile ├── LICENSE ├── TODO.md ├── Combinatorrent.cabal └── README.md /src/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | cd .. && $(MAKE) -------------------------------------------------------------------------------- /src/Process/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | cd .. && $(MAKE) 3 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | Installation instructions 2 | ========================= 3 | 4 | Write me! 5 | -------------------------------------------------------------------------------- /testsuite/Makefile: -------------------------------------------------------------------------------- 1 | dummytest: 2 | ../dist/build/Combinatorrent/Combinatorrent testfile.txt.torrent 3 | 4 | -------------------------------------------------------------------------------- /testsuite/testfile.txt.torrent: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jlouis/combinatorrent/HEAD/testsuite/testfile.txt.torrent -------------------------------------------------------------------------------- /doc/trackerstate.dot: -------------------------------------------------------------------------------- 1 | digraph { 2 | Stopped; 3 | Started; 4 | Completed; 5 | 6 | Stopped -> Started; 7 | Started -> Stopped; 8 | Started -> Completed; 9 | Completed -> Stopped; 10 | } -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Alex Mason 2 | Astro 3 | Jesper Louis Andersen 4 | John Gunderman 5 | Thomas Christensen 6 | -------------------------------------------------------------------------------- /src/AdaptGhcVersion.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | module AdaptGhcVersion 3 | ( 4 | Monoid (..), (<$>), (<*>), pure 5 | ) where 6 | 7 | import Control.Applicative ((<$>), (<*>), pure) 8 | import Data.Monoid (Monoid (..)) 9 | 10 | -------------------------------------------------------------------------------- /tools/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all 2 | 3 | all: 4 | ./postproc.hs present stat_db.txt > stat_db.csv 5 | R --no-save < visualize_stats.R 6 | 7 | push: 8 | cp *.png /home/jlouis/Projects/combinatorrent-gh-pages/img/ 9 | cp *.svg /home/jlouis/Projects/combinatorrent-gh-pages/img/ 10 | 11 | 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | /dist 3 | /doc/*.html 4 | /doc/*.pdf 5 | /doc/*.png 6 | /doc/processes.png 7 | /gen-doc 8 | *.hi 9 | Main 10 | *.o 11 | *.orig 12 | /src/Version.hs 13 | *.swp 14 | *.swo 15 | /tags 16 | /TAGS 17 | /tools/stat_db.txt 18 | /tools/stat_db.csv 19 | /tools/*.png 20 | /tools/*.svg 21 | report.html 22 | /tools/stat_db.backup.txt 23 | .hpc 24 | .cabal-sandbox/ 25 | cabal.sandbox.config 26 | -------------------------------------------------------------------------------- /tools/oprofile.sh: -------------------------------------------------------------------------------- 1 | !/bin/sh 2 | set -x 3 | 4 | sudo opcontrol --setup --vmlinux=/usr/src/linux/vmlinux 5 | sudo opcontrol --init 6 | sudo opcontrol --event=CPU_CLK_UNHALTED:100000:0x00:0:1 --event=L2_LINES_IN:2000:0:0:1 7 | sudo opcontrol --reset 8 | sudo opcontrol --start --separate=thread,library 9 | 10 | ./Combinatorrent "$@" 11 | 12 | sudo opcontrol --dump 13 | sudo opcontrol --shutdown 14 | -------------------------------------------------------------------------------- /src/Tracer.hs: -------------------------------------------------------------------------------- 1 | module Tracer 2 | ( Tracer 3 | , new 4 | , trace 5 | ) 6 | where 7 | 8 | data Tracer a = Tracer [a] [a] Int Int 9 | 10 | instance Show a => Show (Tracer a) where 11 | show (Tracer cur old _ _) = show (cur ++ old) 12 | 13 | new :: Int -> Tracer a 14 | new x = Tracer [] [] 0 x 15 | 16 | trace :: a -> Tracer a -> Tracer a 17 | trace msg (Tracer cur old sz l) 18 | | sz == l = Tracer [msg] cur 0 l 19 | | otherwise = Tracer (msg : cur) old (sz+1) l 20 | 21 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all 2 | 3 | all: processes.pdf timer.pdf PieceManager.pdf Peer.pdf PHierachy.pdf \ 4 | piecemanager.html on-hieraches.html process-transformers.html \ 5 | processes.png PHierachy.png 6 | 7 | %.pdf: %.dot 8 | dot -Tpdf $< > $@ 9 | 10 | %.png: %.dot 11 | dot -Tpng $< > $@ 12 | 13 | %.html: %.lhs 14 | pandoc --smart --from=markdown+lhs --to=html+lhs $< > $@ 15 | 16 | %.html: %.md 17 | pandoc --smart --from=markdown --to=html $< > $@ 18 | 19 | clean: 20 | rm -f *.pdf 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /src/Version.hs.in: -------------------------------------------------------------------------------- 1 | -- | Combinatorrent version 2 | module Version (protoVersion, version) where 3 | 4 | -- | The current version of the Combinatorrent protocol string. This is bumped 5 | -- whenever we make a radical change to the protocol communication or fix a grave bug. 6 | -- It provides a way for trackers to disallow versions of the client which are misbehaving. 7 | protoVersion :: String 8 | protoVersion = "d001" 9 | 10 | githead :: String 11 | githead = "@GITHEAD@" 12 | 13 | version :: String 14 | version = githead 15 | -------------------------------------------------------------------------------- /configure: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Simple configuration script. 4 | 5 | ## Git version 6 | GITHEAD=$(awk '/^version:/ {print $2}' Combinatorrent.cabal) 7 | if test -d "${GIT_DIR:-.git}" ; then 8 | GITHEAD=`git describe 2>/dev/null` 9 | 10 | if test -z ${GITHEAD} ; then 11 | GITHEAD=`git rev-parse HEAD` 12 | fi 13 | 14 | if test -n "`git diff-index -m --name-only HEAD`" ; then 15 | GITHEAD="${GITHEAD}-dirty" 16 | fi 17 | fi 18 | 19 | sed -e "s/@GITHEAD@/$GITHEAD/g" src/Version.hs.in > src/Version.hs 20 | 21 | 22 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # language: haskell 2 | 3 | # See http://www.reddit.com/r/haskell/comments/1os3f6/how_to_use_travisci_with_multiple_ghc_versions/ 4 | 5 | env: 6 | - GHCVER=7.6.3 7 | - GHCVER=7.8.4 8 | - GHCVER=7.10.3 9 | - GHCVER=8.0.1 10 | 11 | before_install: 12 | - sudo add-apt-repository -y ppa:hvr/ghc 13 | - sudo apt-get update 14 | - sudo apt-get install cabal-install-1.24 ghc-$GHCVER 15 | - export PATH=/opt/cabal/bin:/opt/ghc/$GHCVER/bin:$PATH 16 | 17 | install: 18 | - cabal-1.24 update 19 | - cabal-1.24 install --only-dependencies 20 | 21 | script: 22 | - cabal-1.24 configure 23 | - cabal-1.24 build 24 | - cabal-1.24 haddock 25 | - cabal-1.24 sdist 26 | -------------------------------------------------------------------------------- /src/Process/Timer.hs: -------------------------------------------------------------------------------- 1 | -- | The timer module is responsible for timing in the project. With 2 | -- the timer system, we can register timers in the future and then 3 | -- we can get a tick triggered at the point in time where we need to 4 | -- act. This allows us to postpone events into the future at a 5 | -- designated time. 6 | -- 7 | module Process.Timer 8 | ( registerSTM 9 | ) 10 | 11 | where 12 | 13 | import Control.Concurrent 14 | import Control.Concurrent.STM 15 | import Control.Monad.Trans 16 | 17 | registerSTM :: MonadIO m => Int -> TChan a -> a -> m ThreadId 18 | registerSTM secs c m = liftIO $ forkIO $ {-# SCC "Timer" #-} do 19 | threadDelay (secs * 1000000) 20 | atomically $ writeTChan c m 21 | -------------------------------------------------------------------------------- /src/Digest.hs: -------------------------------------------------------------------------------- 1 | -- | Simple abstraction for message digests 2 | {-# LANGUAGE TypeSynonymInstances, CPP #-} 3 | module Digest 4 | ( Digest 5 | , digest 6 | , digestBS 7 | ) 8 | where 9 | 10 | import qualified Data.ByteString as B 11 | 12 | import qualified Data.ByteString.Lazy as L 13 | import qualified Crypto.Hash.SHA1 as SHA1 14 | 15 | -- Consider newtyping this 16 | type Digest = B.ByteString 17 | 18 | #if ! MIN_VERSION_bytestring(0,10,0) 19 | -- instance was introduced 20 | instance NFData Digest 21 | #endif 22 | 23 | digest :: L.ByteString -> B.ByteString 24 | digest bs = {-# SCC "sha1_digest" #-} SHA1.hashlazy bs 25 | 26 | digestBS :: B.ByteString -> B.ByteString 27 | digestBS bs = digest . L.fromChunks $ [bs] 28 | -------------------------------------------------------------------------------- /doc/timer.dot: -------------------------------------------------------------------------------- 1 | digraph { 2 | check [label="Check Head of Queue\nStore time as t", shape=box]; 3 | waitForHead [label="Wait until head triggers", shape=box]; 4 | tick [label="Send Tick"]; 5 | remove [label="Remove Head from queue"]; 6 | ext_event [label="*", shape=point]; 7 | ext_event_l [label="*", shape=point]; 8 | eventEnters [label="Event Enters"]; 9 | elapsedCalc [label="Calculate time elapsed"]; 10 | advanceQueue [label="Advance Queue with time"]; 11 | insertItem [label="Insert New Item\nIn Queue"]; 12 | 13 | check -> waitForHead; 14 | check -> eventEnters; 15 | ext_event -> eventEnters [style=dotted]; 16 | 17 | waitForHead -> tick; 18 | tick -> remove; 19 | tick -> ext_event_l [style=dotted]; 20 | remove -> check; 21 | 22 | eventEnters -> elapsedCalc -> advanceQueue -> insertItem -> check; 23 | } -------------------------------------------------------------------------------- /doc/PHierachy.dot: -------------------------------------------------------------------------------- 1 | digraph { 2 | label="Process Hierachy"; 3 | 4 | node [shape=ellipse]; 5 | S0; 6 | S1; 7 | S2; 8 | SPeer1; 9 | SPeer2; 10 | SPeer3; 11 | 12 | node [shape=box]; 13 | Main; Timer; Console; FS; Tracker; Status; PeerMgr; ChokeMgr; PieceMgr; 14 | 15 | S0 -> Main; 16 | S0 -> S1; 17 | 18 | S1 -> Timer; 19 | S1 -> Console; 20 | S1 -> FS; 21 | S1 -> Tracker; 22 | S1 -> Status; 23 | S1 -> PeerMgr; 24 | S1 -> ChokeMgr; 25 | S1 -> PieceMgr; 26 | S1 -> S2 27 | 28 | S2 -> SPeer1; 29 | SPeer1 -> P1Receiver; 30 | SPeer1 -> P1SendQ; 31 | SPeer1 -> P1PeerP; 32 | SPeer1 -> P1Sender; 33 | 34 | S2 -> SPeer2; 35 | SPeer2 -> P2Receiver; 36 | SPeer2 -> P2SendQ; 37 | SPeer2 -> P2PeerP; 38 | SPeer2 -> P2Sender; 39 | 40 | S2 -> SPeer3; 41 | SPeer3 -> P3Receiver; 42 | SPeer3 -> P3SendQ; 43 | SPeer3 -> P3PeerP; 44 | SPeer3 -> P3Sender; 45 | } -------------------------------------------------------------------------------- /Setup.lhs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env runhaskell 2 | 3 | In principle, we could do with a lot less than autoconfUserhooks, but simpleUserHooks 4 | is not running 'configure'. 5 | 6 | We will need Distribution.Simple to do the heavyweight lifting, and we will need some filePath magic. 7 | 8 | > import System.FilePath 9 | > import System.Process 10 | > import Distribution.Simple 11 | > import Distribution.Simple.LocalBuildInfo 12 | > import Distribution.PackageDescription 13 | 14 | The main program is just to make Cabal lift it. But we will override testing. 15 | 16 | > main = defaultMainWithHooks hooks 17 | > where hooks = autoconfUserHooks { runTests = runTests' } 18 | 19 | Running tests is to call Combinatorrent with its parameters for tests: 20 | 21 | > runTests' :: Args -> Bool -> PackageDescription -> LocalBuildInfo -> IO () 22 | > runTests' _ _ _ lbi = system testprog >> return () 23 | > where testprog = (buildDir lbi) "Combinatorrent" "Combinatorrent --tests" 24 | -------------------------------------------------------------------------------- /src/Process/Peer/Sender.hs: -------------------------------------------------------------------------------- 1 | module Process.Peer.Sender 2 | ( start ) 3 | where 4 | 5 | import Control.Concurrent 6 | import Control.Concurrent.STM 7 | 8 | import Control.Monad.Reader 9 | 10 | import qualified Data.ByteString.Lazy as L 11 | 12 | import Network.Socket hiding (send, sendTo, recv, recvFrom) 13 | import Network.Socket.ByteString.Lazy 14 | import Prelude hiding (getContents) 15 | 16 | import Process 17 | import Supervisor 18 | 19 | data CF = CF { chan :: TMVar L.ByteString 20 | , sock :: Socket } 21 | 22 | instance Logging CF where 23 | logName _ = "Process.Peer.Sender" 24 | 25 | -- | The raw sender process, it does nothing but send out what it syncs on. 26 | start :: Socket -> TMVar L.ByteString -> SupervisorChannel -> IO ThreadId 27 | start s ch supC = spawnP (CF ch s) () ({-# SCC "Sender" #-} 28 | (cleanupP pgm 29 | (defaultStopHandler supC) 30 | (liftIO $ close s))) 31 | pgm :: Process CF () () 32 | pgm = do 33 | ch <- asks chan 34 | s <- asks sock 35 | _ <- liftIO $ do 36 | r <- atomically $ takeTMVar ch 37 | sendAll s r 38 | pgm 39 | 40 | -------------------------------------------------------------------------------- /src/Test.hs: -------------------------------------------------------------------------------- 1 | {- The Test module provides an interface to test-framework. It is called when --tests 2 | - is supplied on the command line. This file only gathers together various test suites 3 | - from all over the rest of the code and then executes them via test-framework. 4 | -} 5 | module Test (runTests) where 6 | 7 | import System.Environment ( getArgs ) 8 | import Test.Framework 9 | import Test.Framework.Providers.QuickCheck2 10 | 11 | import qualified Data.PieceSet (testSuite) 12 | import qualified Data.Queue (testSuite) 13 | import qualified Protocol.BCode (testSuite) 14 | import qualified Protocol.Wire (testSuite) 15 | import qualified Process.Peer (testSuite) 16 | 17 | runTests :: IO () 18 | runTests = 19 | do args <- filter (/= "--tests") `fmap` getArgs 20 | flip defaultMainWithArgs args 21 | [ testSuite 22 | , Data.Queue.testSuite 23 | , Data.PieceSet.testSuite 24 | , Protocol.BCode.testSuite 25 | , Protocol.Wire.testSuite 26 | , Process.Peer.testSuite 27 | ] 28 | 29 | testSuite :: Test 30 | testSuite = testGroup "Test test-framework" 31 | [ testProperty "reverse-reverse/id" prop_reversereverse ] 32 | 33 | -- reversing twice a finite list, is the same as identity 34 | prop_reversereverse :: [Int] -> Bool 35 | prop_reversereverse s = (reverse . reverse) s == id s 36 | where _ = s :: [Int] 37 | 38 | -------------------------------------------------------------------------------- /tools/runCombinatorrent.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ## Run combinatorent while gathering statistics. Use this script by symlinking it from the directory 4 | ## where you run tests in and then call it. 5 | 6 | ## Find the combinatorrent executable 7 | thisscript=$(readlink -f $0) 8 | basedir=$(dirname ${thisscript}) 9 | COMBINATORRENT=${basedir}/../dist/build/Combinatorrent/Combinatorrent 10 | POSTPROC=${basedir}/postproc.hs 11 | 12 | ## Set up default locations for statistics 13 | DBFILE=${basedir}/stat_db.txt 14 | STATDIR='/tmp/combinatorrent' 15 | 16 | RTSOPTS="+RTS -A3m -t${STATDIR}/Combinatorrent.rts_stat --machine-readable -RTS" 17 | STATOPTS="-S ${STATDIR}/Combinatorrent.stat" 18 | 19 | ct () { 20 | date +%s >> ${STATDIR}/Combinatorrent.times 21 | touch ${STATDIR}/Combinatorrent.stat 22 | ${COMBINATORRENT} ${RTSOPTS} ${STATOPTS} "$@" 23 | date +%s >> ${STATDIR}/Combinatorrent.times 24 | } 25 | 26 | postproc () { 27 | ${POSTPROC} gather ${STATDIR}/Combinatorrent.rts_stat ${STATDIR}/Combinatorrent.stat \ 28 | ${STATDIR}/Combinatorrent.times >> ${DBFILE} 29 | } 30 | 31 | cleanfiles () { 32 | rm -f ${STATDIR}/Combinatorrent.times 33 | rm -f ${STATDIR}/Combinatorrent.rts_stat 34 | rm -f ${STATDIR}/Combinatorrent.stat 35 | } 36 | 37 | mkdir -p ${STATDIR} 38 | cleanfiles 39 | ct "$@" 40 | postproc 41 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test build clean rebuild local-install haddock hlint tags conf dist-rebuild conf-nodebug conf-ts 2 | build: 3 | runghc Setup.lhs build 4 | 5 | clean: 6 | runghc Setup.lhs clean 7 | 8 | test: build 9 | runghc Setup.lhs test 10 | 11 | conf: 12 | runghc Setup.lhs configure --user --enable-library-profiling --enable-executable-profiling --enable-optimization=2 13 | 14 | conf-debug: 15 | runghc Setup.lhs configure --flags="debug" --user --enable-library-profiling --enable-executable-profiling --enable-optimization 16 | 17 | conf-t: 18 | runghc Setup.lhs configure --flags="threaded" --user --enable-library-profiling --enable-optimization=2 19 | 20 | conf-hpc: 21 | runghc Setup.lhs configure --ghc-options=-fhpc --user --enable-library-profiling --enable-executable-profiling --enable-optimization 22 | 23 | conf-no-opt: 24 | runghc Setup.lhs configure --flags="debug" --user --enable-library-profiling --enable-executable-profiling --enable-optimization=0 25 | 26 | conf-ts: 27 | runghc Setup.lhs configure --flags="debug threadscope" --user 28 | 29 | rebuild: configure build 30 | 31 | dist-rebuild: clean local-install 32 | 33 | local-install: 34 | cabal install --prefix=$$HOME --user 35 | 36 | haddock: 37 | runghc Setup.lhs haddock --executables 38 | 39 | hlint: 40 | hlint -r src 41 | 42 | tags: 43 | hothasktags $$(find src -type f -name '*.*hs') > $@ 44 | 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009, 2010, Jesper Louis Andersen 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are 6 | met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in the 13 | documentation and/or other materials provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /src/TestInstance.hs: -------------------------------------------------------------------------------- 1 | -- Define a set of test instances of common types 2 | -- Portions of this code is taken from "Real World Haskell" 3 | module TestInstance 4 | () 5 | where 6 | 7 | 8 | import qualified Data.ByteString as B 9 | import qualified Data.ByteString.Lazy as L 10 | 11 | 12 | import Test.QuickCheck 13 | 14 | 15 | {-# LANGUAGE CPP #-} 16 | #if MIN_VERSION_random(1,0,1) 17 | -- random>=1.0.1 is exporting these instances, so don't need to redefine it 18 | #else 19 | 20 | import Data.Word 21 | import System.Random 22 | 23 | integralRandomR :: (Integral a, Integral b, RandomGen g, Num b) => (a, b) -> g -> (b, g) 24 | integralRandomR (a,b) g = case randomR (c,d) g of 25 | (x,h) -> (fromIntegral x, h) 26 | where (c,d) = (fromIntegral a :: Integer, 27 | fromIntegral b :: Integer) 28 | 29 | instance Random Word32 where 30 | randomR = integralRandomR 31 | random = randomR (minBound, maxBound) 32 | 33 | instance Random Word8 where 34 | randomR = integralRandomR 35 | random = randomR (minBound, maxBound) 36 | #endif 37 | 38 | instance Arbitrary L.ByteString where 39 | arbitrary = L.pack `fmap` arbitrary 40 | 41 | instance CoArbitrary L.ByteString where 42 | coarbitrary = coarbitrary . L.unpack 43 | 44 | instance Arbitrary B.ByteString where 45 | arbitrary = B.pack `fmap` arbitrary 46 | 47 | instance CoArbitrary B.ByteString where 48 | coarbitrary = coarbitrary . B.unpack 49 | 50 | -------------------------------------------------------------------------------- /src/Process/Listen.hs: -------------------------------------------------------------------------------- 1 | module Process.Listen 2 | ( start 3 | ) 4 | where 5 | 6 | import Control.Concurrent 7 | import Control.Concurrent.STM 8 | import Control.Exception (bracketOnError) 9 | import Control.Monad.Reader 10 | 11 | import Data.Word 12 | 13 | import Network hiding (accept) 14 | import Network.Socket 15 | import Network.BSD 16 | 17 | import Process 18 | import Process.PeerMgr hiding (start) 19 | import Supervisor 20 | 21 | data CF = CF { peerMgrCh :: PeerMgrChannel } 22 | 23 | instance Logging CF where 24 | logName _ = "Process.Listen" 25 | 26 | start :: Word16 -> PeerMgrChannel -> SupervisorChannel -> IO ThreadId 27 | start port peerMgrC supC = do 28 | spawnP (CF peerMgrC) () ({-# SCC "Listen" #-} catchP (openListen port >>= eventLoop) 29 | (defaultStopHandler supC)) -- TODO: Close socket resource! 30 | 31 | openListen :: Word16 -> Process CF () Socket 32 | openListen port = liftIO $ do 33 | proto <- getProtocolNumber "tcp" 34 | bracketOnError 35 | (socket AF_INET Stream proto) 36 | (close) 37 | (\sock -> do 38 | setSocketOption sock ReuseAddr 1 39 | bind sock (SockAddrInet (toEnum $ fromIntegral port) iNADDR_ANY) 40 | listen sock maxListenQueue 41 | return sock 42 | ) 43 | 44 | eventLoop :: Socket -> Process CF () () 45 | eventLoop sockFd = do 46 | c <- asks peerMgrCh 47 | liftIO $ do 48 | conn <- accept sockFd 49 | atomically $ writeTChan c (NewIncoming conn) 50 | eventLoop sockFd 51 | 52 | -------------------------------------------------------------------------------- /src/Channels.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TypeSynonymInstances #-} 2 | {-# LANGUAGE FlexibleInstances #-} 3 | module Channels 4 | ( Peer(..) 5 | , PeerChokeMsg(..) 6 | , MsgTy(..) 7 | , PeerChannel 8 | , MgrMessage(..) 9 | , MgrChannel 10 | , BandwidthChannel 11 | , TrackerMsg(..) 12 | , TrackerChannel 13 | ) 14 | where 15 | 16 | import Control.Concurrent 17 | import Control.Concurrent.STM 18 | import Control.DeepSeq 19 | 20 | import Network.Socket 21 | 22 | import Protocol.Wire 23 | import Torrent 24 | 25 | data Peer = Peer SockAddr 26 | 27 | data MsgTy = FromPeer (Message, Int) 28 | | FromSenderQ Int -- Always UpRate events 29 | | FromChokeMgr PeerChokeMsg 30 | | TimerTick 31 | 32 | data PeerChokeMsg = ChokePeer 33 | | UnchokePeer 34 | | PieceCompleted PieceNum 35 | | CancelBlock PieceNum Block 36 | 37 | type PeerChannel = TChan MsgTy 38 | 39 | instance NFData PeerChannel where 40 | rnf pc = pc `seq` () 41 | 42 | ---- TRACKER 43 | 44 | -- | Messages to the tracker process 45 | data TrackerMsg = Stop -- ^ Ask the Tracker to stop 46 | | TrackerTick Integer -- ^ Ticking in the tracker, used to contact again 47 | | Start -- ^ Ask the tracker to Start 48 | | Complete -- ^ Ask the tracker to Complete the torrent 49 | type TrackerChannel = TChan TrackerMsg 50 | 51 | data MgrMessage = Connect InfoHash ThreadId PeerChannel 52 | | Disconnect ThreadId 53 | 54 | type MgrChannel = TChan MgrMessage 55 | 56 | -- | A Channel type we use for transferring the amount of data we transferred 57 | type BandwidthChannel = TChan Integer 58 | -------------------------------------------------------------------------------- /tools/postproc.hs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env runhaskell 2 | 3 | import Data.List 4 | import System 5 | 6 | main = do 7 | args <- getArgs 8 | case args of 9 | ["gather", rtsStat, combinatorrentStat, timeStat] -> 10 | gatherStats rtsStat combinatorrentStat timeStat 11 | ["present", database] -> presentStats database 12 | 13 | 14 | gatherStats rtsStat combinatorrentStat timeStat = do 15 | tStat <- readTimes timeStat 16 | cStat <- readCombinatorrentStat combinatorrentStat 17 | rStat <- readRtsStat rtsStat 18 | putStrLn $ show (tStat ++ cStat ++ rStat) 19 | 20 | 21 | readRtsStat :: FilePath -> IO [(String, String)] 22 | readRtsStat fp = do 23 | cts <- readFile fp 24 | return $ read . unlines . tail . lines $ cts 25 | 26 | readCombinatorrentStat :: FilePath -> IO [(String, String)] 27 | readCombinatorrentStat fp = do 28 | cts <- readFile fp 29 | let d = read cts :: [(String, String)] 30 | return $ map (\(k, v) -> (k, show v)) d 31 | 32 | readTimes :: FilePath -> IO [(String, String)] 33 | readTimes timeStat = do 34 | contents <- readFile timeStat 35 | let [s, e] = (map read . lines $ contents) :: [Integer] 36 | return [("start_time", show s) 37 | ,("end_time" , show e)] 38 | 39 | presentStats db = do 40 | cts <- readFile db 41 | let ls = map read . lines $ cts 42 | putStrLn "#Start\tEnd\tMaxBytesUsed\tPeakMegabytesAlloc\tMutCPU\tGCCPU\tUploaded\tDownloaded" 43 | let formatted = map (format ["start_time", "end_time", "max_bytes_used", 44 | "peak_megabytes_allocated", 45 | "mutator_cpu_seconds", 46 | "GC_cpu_seconds", 47 | "uploaded", "downloaded"]) ls 48 | mapM_ putStrLn formatted 49 | 50 | format :: [String] -> [(String, String)] -> String 51 | format cols row = concat $ intersperse "\t" entries 52 | where entries = map (\c -> case find ((==c) . fst) row of 53 | Nothing -> error "Column doesn't exist" 54 | Just x -> snd x) cols 55 | 56 | -------------------------------------------------------------------------------- /doc/git.md: -------------------------------------------------------------------------------- 1 | What are the branches in the project? 2 | ===================================== 3 | 4 | (TODO: Convert the rest of this into markdown) 5 | 6 | This is stolen totally from git by Junio C. Hamano. I like it, 7 | so this is how we rock the main repository. 8 | 9 | The policy. 10 | 11 | - Feature releases are numbered as vX.Y and are meant to 12 | contain bugfixes and enhancements in any area, including 13 | functionality, performance and usability, without regression. 14 | 15 | - Maintenance releases are numbered as vX.Y.W and are meant 16 | to contain only bugfixes for the corresponding vX.Y feature 17 | release and earlier maintenance releases vX.Y.V (V < W). 18 | 19 | - 'master' branch is used to prepare for the next feature 20 | release. In other words, at some point, the tip of 'master' 21 | branch is tagged with vX.Y. 22 | 23 | - 'maint' branch is used to prepare for the next maintenance 24 | release. After the feature release vX.Y is made, the tip 25 | of 'maint' branch is set to that release, and bugfixes will 26 | accumulate on the branch, and at some point, the tip of the 27 | branch is tagged with vX.Y.1, vX.Y.2, and so on. 28 | 29 | - 'next' branch is used to publish changes (both enhancements 30 | and fixes) that (1) have worthwhile goal, (2) are in a fairly 31 | good shape suitable for everyday use, (3) but have not yet 32 | demonstrated to be regression free. New changes are tested 33 | in 'next' before merged to 'master'. 34 | 35 | - 'pu' branch is used to publish other proposed changes that do 36 | not yet pass the criteria set for 'next'. 37 | 38 | - The tips of 'master', 'maint' and 'next' branches will always 39 | fast forward, to allow people to build their own 40 | customization on top of them. 41 | 42 | - Usually 'master' contains all of 'maint', 'next' contains all 43 | of 'master' and 'pu' contains all of 'next'. 44 | 45 | - The tip of 'master' is meant to be more stable than any 46 | tagged releases, and the users are encouraged to follow it. 47 | 48 | - The 'next' branch is where new action takes place, and the 49 | users are encouraged to test it so that regressions and bugs 50 | are found before new topics are merged to 'master'. 51 | -------------------------------------------------------------------------------- /src/Process/DirWatcher.hs: -------------------------------------------------------------------------------- 1 | -- | The DirWatcher Process runs a watcher over a directory. It will tell about any change 2 | -- happening inside that directory. 3 | module Process.DirWatcher ( 4 | -- * Interface 5 | start 6 | ) 7 | where 8 | 9 | import Control.Concurrent 10 | import Control.Concurrent.STM 11 | 12 | import Control.Monad.Reader 13 | import Control.Monad.State 14 | 15 | import qualified Data.Set as S 16 | 17 | import System.Directory 18 | import System.FilePath 19 | 20 | import Prelude hiding (log) 21 | import Process 22 | import Process.TorrentManager hiding (start) 23 | import Process.Timer 24 | import Supervisor 25 | 26 | 27 | data CF = CF { reportCh :: TorrentMgrChan -- ^ Channel for reporting directory changes 28 | , tickCh :: TChan () 29 | , dirToWatch :: FilePath } 30 | 31 | type ST = S.Set FilePath 32 | 33 | instance Logging CF where 34 | logName _ = "Process.DirWatcher" 35 | 36 | start :: FilePath -- ^ Path to watch 37 | -> TorrentMgrChan -- ^ Channel to return answers on 38 | -> SupervisorChannel 39 | -> IO ThreadId 40 | start fp chan supC = do 41 | tickC <- newTChanIO 42 | _ <- registerSTM 10 tickC () 43 | spawnP (CF chan tickC fp) S.empty 44 | ({-# SCC "DirWatcher" #-} 45 | catchP pgm (defaultStopHandler supC)) 46 | where pgm = do 47 | tc <- asks tickCh 48 | () <- liftIO . atomically $ do readTChan tc 49 | processDirectory 50 | _ <- registerSTM 10 tc () 51 | pgm 52 | 53 | processDirectory :: Process CF ST () 54 | processDirectory = do 55 | watchDir <- asks dirToWatch 56 | files <- liftIO $ map (watchDir ) `fmap` getDirectoryContents watchDir 57 | let torrents = S.fromList $ filter (\fp -> (== ".torrent") $ snd . splitExtension $ fp) files 58 | running <- get 59 | let (added, removed) = (S.toList $ S.difference torrents running, 60 | S.toList $ S.difference running torrents) 61 | msg = (map AddedTorrent added ++ map RemovedTorrent removed) 62 | when (msg /= []) 63 | (do rc <- asks reportCh 64 | liftIO . atomically $ writeTChan rc msg 65 | -- Make ready for next iteration 66 | put torrents) 67 | 68 | -------------------------------------------------------------------------------- /doc/PieceManager.dot: -------------------------------------------------------------------------------- 1 | digraph { 2 | node [fontname="URW Gothic L",fontsize=10]; 3 | edge [fontname="URW Gothic L",fontsize=8]; 4 | 5 | labeljust = l; 6 | labelloc = t; 7 | 8 | fontsize = 24; 9 | fontname="URW Gothic L"; 10 | label = "Piece Manager FlowChart"; 11 | 12 | 13 | // External communication 14 | { rank=same; 15 | peer [label="Peer", shape=ellipse,color=deepskyblue4]; 16 | loop [label="Loop/Sync"]; 17 | } 18 | 19 | fs [label="FS", shape=ellipse,color=deepskyblue4]; 20 | fs -> checkp [style=dotted,color=deepskyblue4]; 21 | storeb -> fs [style=dotted,color=deepskyblue4]; 22 | checkp -> fs [style=dotted,color=deepskyblue4]; 23 | 24 | // Internal state changes 25 | peer -> grab [style=dotted,color=deepskyblue4]; 26 | trackb -> peer [style=dotted,color=deepskyblue4]; 27 | peer -> blockc [style=dotted,color=deepskyblue4]; 28 | 29 | node [shape=box]; 30 | 31 | grab [label="Grab N blocks"]; 32 | grabProgress [label="Grab from\nin Progress"]; 33 | grabPending [label="Grab from\nPending"]; 34 | 35 | trackb [label="Track Grabs"]; 36 | 37 | blockc [label="Block Completed"]; 38 | storeb [label="Store Block"]; 39 | pieced [label="Piece Done?", shape=diamond]; 40 | checkp [label="Check Piece", shape=diamond]; 41 | markc [label="Mark Piece\ncomplete"]; 42 | putbackp [label="Putback Piece"]; 43 | 44 | loop -> blockc; 45 | blockc -> storeb; 46 | storeb -> pieced; 47 | pieced -> loop [label="No"]; 48 | pieced -> checkp [label="Yes"]; 49 | checkp -> markc [label="Ok"]; 50 | markc -> loop; 51 | checkp -> putbackp [label="Fail"]; 52 | putbackp -> loop; 53 | 54 | loop -> grab; 55 | 56 | grab -> grabProgress; 57 | grabProgress -> trackb [label="Enough\npieces"]; 58 | grabProgress -> grabPending [label="None progress\neligible"]; 59 | grabPending -> grabProgress [label="Found pending\nadd to progress"]; 60 | grabPending -> trackb [label="Exhausted"]; 61 | 62 | trackb -> loop; 63 | 64 | subgraph cluster_piece_state { 65 | fontsize = 16; 66 | label = "Piece States"; 67 | 68 | node [shape=ellipse]; 69 | pending [label="PENDING"]; 70 | in_progress [label="IN PROGRESS\n(BPending, BDone, n/k)"]; 71 | done [label="DONE"]; 72 | 73 | pending -> in_progress; 74 | in_progress -> pending [label="Fail"]; 75 | in_progress -> done [label="Ok"]; 76 | 77 | } 78 | } -------------------------------------------------------------------------------- /doc/processes.dot: -------------------------------------------------------------------------------- 1 | graph { 2 | 3 | node [shape=ellipse]; 4 | { rank=same; P1; P2; P3; } 5 | node [shape=box]; 6 | Tracker; 7 | { rank = same; 8 | PeerMgr; 9 | ChokeMgr; 10 | } 11 | 12 | 13 | subgraph cluster_p1 { 14 | label="Peer #1"; 15 | 16 | P1Main [label="PeerP"]; 17 | P1Receiver [label="Peer_Receiver"]; 18 | P1SendQ [label="Peer SendQueue"]; 19 | P1Sender [label="Peer Sender"]; 20 | 21 | P1Receiver -- P1Main; 22 | P1Main -- P1SendQ -- P1Sender; 23 | } 24 | 25 | subgraph cluster_p2 { 26 | label="Peer #2"; 27 | 28 | P2Main [label="PeerP"]; 29 | P2Receiver [label="Peer_Receiver"]; 30 | P2SendQ [label="Peer SendQueue"]; 31 | P2Sender [label="Peer Sender"]; 32 | 33 | P2Receiver -- P2Main; 34 | P2Main -- P2SendQ -- P2Sender; 35 | } 36 | 37 | subgraph cluster_p3 { 38 | label="Peer #3"; 39 | 40 | P3Main [label="PeerP"]; 41 | P3Receiver [label="Peer_Receiver"]; 42 | P3SendQ [label="Peer SendQueue"]; 43 | P3Sender [label="Peer Sender"]; 44 | 45 | P3Receiver -- P3Main; 46 | P3Main -- P3SendQ -- P3Sender; 47 | } 48 | 49 | Status; 50 | FS; 51 | Main; 52 | Console; 53 | Timer; 54 | PieceMgr; 55 | 56 | Listener; 57 | 58 | node [shape=diamond]; 59 | HTTP; 60 | Socket; 61 | 62 | edge [style=dotted,color=deepskyblue4]; 63 | HTTP -- Tracker; 64 | P1 -- Socket; 65 | P2 -- Socket; 66 | P3 -- Socket; 67 | Listener-- Socket; 68 | 69 | edge [style=solid,color=black]; 70 | 71 | Tracker -- PeerMgr; 72 | 73 | Timer -- Tracker; 74 | 75 | PeerMgr -- Status; 76 | ChokeMgr -- P1; 77 | ChokeMgr -- P2; 78 | ChokeMgr -- P3; 79 | PeerMgr -- ChokeMgr; 80 | 81 | PHub [shape=point]; 82 | P1 -- PHub [arrowhead=none]; 83 | P2 -- PHub [arrowhead=none]; 84 | P3 -- PHub [arrowhead=none]; 85 | PHub -- PeerMgr; 86 | 87 | PHub_FS [shape=point]; 88 | PHub_FS -- P1; 89 | PHub_FS -- P2; 90 | PHub_FS -- P3; 91 | PHub_FS -- FS; 92 | 93 | 94 | PHub_S [shape=point]; 95 | P1 -- PHub_S [arrowhead=none]; 96 | P2 -- PHub_S [arrowhead=none]; 97 | P3 -- PHub_S [arrowhead=none]; 98 | PHub_S -- Status; 99 | 100 | PHub_PM [shape=point]; 101 | 102 | P1 -- PHub_PM [arrowhead=none]; 103 | P2 -- PHub_PM [arrowhead=none]; 104 | P3 -- PHub_PM [arrowhead=none]; 105 | PHub_PM -- PieceMgr; 106 | 107 | PieceMgr -- FS; 108 | 109 | FS -- Status; 110 | 111 | Main -- Console; 112 | Main -- FS; 113 | 114 | Status -- Tracker; 115 | Status -- Main; 116 | 117 | Listener -- P1 [style=dashed,color=darkgreen]; 118 | 119 | } -------------------------------------------------------------------------------- /src/Process/FS.hs: -------------------------------------------------------------------------------- 1 | -- | File system process. Acts as a maintainer for the filesystem in 2 | -- question and can only do single-file torrents. It should be 3 | -- fairly easy to add Multi-file torrents by altering this file and 4 | -- the FS module. 5 | {-# LANGUAGE ScopedTypeVariables #-} 6 | module Process.FS 7 | ( FSPChannel 8 | , FSPMsg(..) 9 | , start 10 | ) 11 | where 12 | 13 | import Control.Concurrent 14 | import Control.Concurrent.STM 15 | import Control.Monad.Reader 16 | import Control.Monad.State 17 | 18 | import Data.Array 19 | import qualified Data.ByteString as B 20 | 21 | import Process 22 | import Torrent 23 | import qualified FS 24 | import Supervisor 25 | 26 | data FSPMsg = CheckPiece PieceNum (TMVar (Maybe Bool)) 27 | | WriteBlock PieceNum Block B.ByteString 28 | | ReadBlock PieceNum Block (TMVar B.ByteString) 29 | 30 | type FSPChannel = TChan FSPMsg 31 | 32 | data CF = CF 33 | { fspCh :: FSPChannel -- ^ Channel on which to receive messages 34 | } 35 | 36 | instance Logging CF where 37 | logName _ = "Process.FS" 38 | 39 | data ST = ST 40 | { fileHandles :: !FS.Handles -- ^ The file we are working on 41 | , pieceMap :: !FS.PieceMap -- ^ Map of where the pieces reside 42 | } 43 | 44 | 45 | -- INTERFACE 46 | ---------------------------------------------------------------------- 47 | 48 | start :: FS.Handles -> FS.PieceMap -> FSPChannel -> SupervisorChannel -> IO ThreadId 49 | start handles pm fspC supC = 50 | spawnP (CF fspC) (ST handles pm) ({-# SCC "FS" #-} 51 | catchP lp (defaultStopHandler supC)) 52 | where 53 | lp = do 54 | c <- asks fspCh 55 | msg <- liftIO . atomically $ readTChan c 56 | case msg of 57 | CheckPiece n v -> do 58 | pmap <- gets pieceMap 59 | let p = pmap ! n 60 | r <- gets fileHandles >>= (liftIO . FS.checkPiece p) 61 | liftIO . atomically $ putTMVar v (Just r) 62 | ReadBlock n blk v -> do 63 | debugP $ "Reading block #" ++ show n 64 | ++ "(" ++ show (blockOffset blk) ++ ", " ++ show (blockSize blk) ++ ")" 65 | -- TODO: Protection, either here or in the Peer code 66 | h <- gets fileHandles 67 | bs <- gets pieceMap >>= (liftIO . FS.readBlock n blk h) 68 | liftIO . atomically $ putTMVar v bs 69 | WriteBlock pn blk bs -> {-# SCC "FS_WriteBlock" #-} do 70 | -- TODO: Protection, either here or in the Peer code 71 | fh <- gets fileHandles 72 | pmap <- gets pieceMap 73 | liftIO $ FS.writeBlock fh pn blk pmap bs 74 | lp 75 | -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | The TODO list 2 | ============= 3 | 4 | (This is a Markdown file) 5 | 6 | The list of things that needs done. Feel free to take anything on the 7 | list if you want, but do coordinate so we don't do multiple work on 8 | the same thing. Feel free to add anything to the list as well. It 9 | partially acts like a bug tracker at the moment in addition to being a 10 | wish-list. 11 | 12 | - Handle error cases when checking a torrent file. 13 | - Do not connect to ourselves :) 14 | - (thomaschrstnsn) Implement a creator for torrent files 15 | - If we get a wrong URI, the code currently deadlocks since the tracker 16 | dies. Handle this problem gracefully. 17 | - When we grab pieces from the Piece Manager, let it provide us with a 18 | pruned set of pieces we can ask with later. This way, we only need to 19 | consider pieces we already have once and we get a faster system. 20 | When doing this, only prune pieces which are done and checked. 21 | - Consider letting the supervisors support monitoring of processes. Use 22 | this to reimplement parts of the PeerMgr code. 23 | - Improve synchronization when the supervisor tree is closing down. 24 | Currently the problem is that the supervisor tree will close down by 25 | asynchronous messages, so the sync on stopping tree will not wait until 26 | the subtree is done. This has another quite dangerous implication: 27 | Stray indefinite blocks on mvars when closing down. 28 | The fix is to build more structure into the closing of the supervisor 29 | tree and make it properly synchronous. 30 | - Cut down communication from the Receiver to the Peer Control process: 31 | When the Receiver Process runs, it should try to drain its socket as 32 | much as possible before trying to communicate with the peer. It should 33 | also try to drain the socket again while waiting on the Control 34 | process. Doing this will lower the contended point of communication in 35 | the system. 36 | - Should the PieceManager know about the InfoHash? We could run a version 37 | without this knowledge. 38 | 39 | Items for later (no particular order) 40 | ------------------------------------- 41 | 42 | - Reduce CPU load. Alternative representations of various data structures 43 | are needed. 44 | - Add prioritization support of multiTorrents 45 | - Design, build and improve a graphic UI. 46 | - Design, build and improve a protocol for communicating with the client. 47 | - Azureus/Vuze has a keepalive flood detector built-in. Consider if this 48 | is relevant for this client. 49 | - Process monitoring in general. Think. 50 | - Write a fuzzing framework for bittorrent. 51 | 52 | # vim: filetype=none tw=76 expandtab 53 | -------------------------------------------------------------------------------- /src/Data/PendingSet.hs: -------------------------------------------------------------------------------- 1 | module Data.PendingSet 2 | ( PendingSet 3 | , Data.PendingSet.empty 4 | , Data.PendingSet.size 5 | , remove 6 | , have 7 | , unhave 8 | , haves 9 | , unhaves 10 | , pick 11 | ) 12 | where 13 | 14 | import Data.PSQueue hiding (foldl) 15 | 16 | import Torrent 17 | 18 | -- | Representation of Pending Sets. 19 | newtype PendingSet = PendingSet { unPS :: PSQ PieceNum Int } 20 | 21 | -- | The empty pending set. 22 | empty :: PendingSet 23 | empty = PendingSet Data.PSQueue.empty 24 | 25 | size :: PendingSet -> Int 26 | size = Data.PSQueue.size . unPS 27 | 28 | -- | A peer has a given piece. Reflect this in the PendingSet. 29 | have :: PieceNum -> PendingSet -> PendingSet 30 | have pn = PendingSet . alter f pn . unPS 31 | where f Nothing = Just 1 32 | f (Just x) = Just (x + 1) 33 | 34 | -- | A Peer does not have a given piece anymore (TODO: Not used in practice) 35 | unhave :: PieceNum -> PendingSet -> PendingSet 36 | unhave pn = PendingSet . alter f pn . unPS 37 | where f Nothing = Nothing 38 | f (Just 1) = Nothing 39 | f (Just x) = Just (x-1) 40 | 41 | -- | Remove a piece from the histogram. Used when it completes 42 | remove :: PieceNum -> PendingSet -> PendingSet 43 | remove pn = PendingSet . delete pn . unPS 44 | 45 | -- | Add all pieces in a bitfield 46 | haves :: [PieceNum] -> PendingSet -> PendingSet 47 | haves pns = flip (foldl f) pns 48 | where f e = flip have e 49 | 50 | -- | Remove all pieces in a bitfield 51 | unhaves :: [PieceNum] -> PendingSet -> PendingSet 52 | unhaves pns = flip (foldl f) pns 53 | where f e = flip unhave e 54 | 55 | -- | Crawl through the set of pending pieces in decreasing order of rarity. 56 | -- Each piece is discriminated by a selector function until the first hit is 57 | -- found. Then all Pieces of the same priority accepted by the selector is 58 | -- chosen for return. 59 | pick :: (PieceNum -> IO Bool) -> PendingSet -> IO (Maybe [PieceNum]) 60 | pick selector ps = findPri (minView . unPS $ ps) 61 | where findPri Nothing = return Nothing 62 | findPri (Just (pn :-> p, rest)) = do 63 | r <- selector pn 64 | if r 65 | then pickAtPri numToPick p [pn] (minView rest) 66 | else findPri $ minView rest 67 | pickAtPri 0 _p acc _ = return $ Just acc 68 | pickAtPri _ _p acc Nothing = return $ Just acc 69 | pickAtPri k p acc (Just (pn :-> p', rest)) 70 | | p == p' = do 71 | r <- selector pn 72 | if r 73 | then pickAtPri (k-1) p (pn : acc) $ minView rest 74 | else pickAtPri k p acc $ minView rest 75 | | otherwise = return $ Just acc 76 | 77 | -- | Number of pieces to pick with the picker. Setting an upper limit here because if a lot 78 | -- of peers have all pieces, these numbers grow insanely big, leading to allocation we 79 | -- don't really need. 80 | numToPick :: Int 81 | numToPick = 7 82 | -------------------------------------------------------------------------------- /src/Process/Console.hs: -------------------------------------------------------------------------------- 1 | -- | The Console process has two main purposes. It is a telnet-like 2 | -- interface with the user and it is our first simple logging device 3 | -- for what happens inside the system. 4 | {-# LANGUAGE ScopedTypeVariables #-} 5 | module Process.Console 6 | ( start 7 | ) 8 | where 9 | 10 | import Control.Concurrent 11 | import Control.Concurrent.STM 12 | import Control.Monad.Reader 13 | 14 | import Prelude 15 | 16 | 17 | import Process 18 | import qualified Process.Status as St 19 | import Supervisor 20 | 21 | 22 | data Cmd = Quit -- ^ Quit the program 23 | | Show -- ^ Show current state 24 | | Help -- ^ Print Help message 25 | | Unknown String -- ^ Unknown command 26 | deriving (Eq, Show) 27 | 28 | type CmdChannel = TChan Cmd 29 | 30 | data CF = CF { cmdCh :: CmdChannel 31 | , wrtCh :: TChan String } 32 | 33 | instance Logging CF where 34 | logName _ = "Process.Console" 35 | 36 | -- | Start the logging process and return a channel to it. Sending on this 37 | -- Channel means writing stuff out on stdOut 38 | start :: TMVar () -> St.StatusChannel -> SupervisorChannel -> IO ThreadId 39 | start waitC statusC supC = do 40 | cmdC <- readerP -- We shouldn't be doing this in the long run 41 | wrtC <- writerP 42 | spawnP (CF cmdC wrtC) () ({-# SCC "Console" #-} 43 | catchP eventLoop (defaultStopHandler supC)) 44 | where 45 | eventLoop = do 46 | c <- asks cmdCh 47 | o <- asks wrtCh 48 | m <- liftIO . atomically $ readTChan c 49 | case m of 50 | Quit -> liftIO . atomically $ putTMVar waitC () 51 | Help -> liftIO . atomically $ writeTChan o helpMessage 52 | (Unknown n) -> liftIO . atomically $ writeTChan o $ "Uknown command: " ++ n 53 | Show -> do 54 | v <- liftIO newEmptyTMVarIO 55 | liftIO . atomically $ writeTChan statusC (St.RequestAllTorrents v) 56 | sts <- liftIO . atomically $ takeTMVar v 57 | liftIO . atomically $ writeTChan o (show sts) 58 | eventLoop 59 | 60 | helpMessage :: String 61 | helpMessage = concat 62 | [ "Command Help:\n" 63 | , "\n" 64 | , " help - Show this help\n" 65 | , " quit - Quit the program\n" 66 | , " show - Show the current downloading status\n" 67 | ] 68 | 69 | writerP :: IO (TChan String) 70 | writerP = do wrt <- newTChanIO 71 | _ <- forkIO $ lp wrt 72 | return wrt 73 | where lp wCh = {-# SCC "writerP" #-} 74 | forever (do m <- atomically $ readTChan wCh 75 | putStrLn m) 76 | 77 | readerP :: IO CmdChannel 78 | readerP = do cmd <- newTChanIO 79 | _ <- forkIO $ lp cmd 80 | return cmd 81 | where lp cmd = {-# SCC "readerP" #-} forever $ 82 | do l <- getLine 83 | atomically $ writeTChan cmd 84 | (case l of 85 | "help" -> Help 86 | "quit" -> Quit 87 | "show" -> Show 88 | c -> Unknown c) 89 | 90 | -------------------------------------------------------------------------------- /src/RateCalc.hs: -------------------------------------------------------------------------------- 1 | -- | Rate calculation. 2 | {-# LANGUAGE BangPatterns #-} 3 | module RateCalc ( 4 | -- * Types 5 | Rate 6 | -- * Interface 7 | , new 8 | , update 9 | , extractCount 10 | , extractRate 11 | ) 12 | 13 | where 14 | 15 | import Control.DeepSeq 16 | import Data.Time.Clock 17 | 18 | -- | A Rate is a record of information used for calculating the rate 19 | data Rate = Rate 20 | { rate :: !Double -- ^ The current rate 21 | , bytes :: !Int -- ^ The amount of bytes transferred since last rate extraction 22 | , count :: !Int -- ^ The amount of bytes transferred since last count extraction 23 | , lastExt :: !UTCTime -- ^ When was the last rate update 24 | , rateSince :: !UTCTime -- ^ From where is the rate measured 25 | } 26 | 27 | instance NFData Rate where 28 | rnf (Rate r b c _ _) = 29 | rnf r `seq` rnf b `seq` rnf c 30 | 31 | fudge :: NominalDiffTime 32 | fudge = fromInteger 5 -- Seconds 33 | 34 | maxRatePeriod :: NominalDiffTime 35 | maxRatePeriod = fromInteger 20 -- Seconds 36 | 37 | new :: UTCTime -> Rate 38 | new t = Rate { rate = 0.0 39 | , bytes = 0 40 | , count = 0 41 | , lastExt = addUTCTime (-fudge) t 42 | , rateSince = addUTCTime (-fudge) t 43 | } 44 | 45 | -- | The call @update n rt@ updates the rate structure @rt@ with @n@ new bytes 46 | update :: Int -> Rate -> Rate 47 | update n rt = {-# SCC "update" #-} 48 | rt { bytes = nb, count = nc} 49 | where !nb = bytes rt + n 50 | !nc = count rt + n 51 | 52 | 53 | -- | The call @extractRate t rt@ extracts the current rate from the rate 54 | -- structure and updates the rate structures internal book-keeping 55 | extractRate :: UTCTime -> Rate -> (Double, Rate) 56 | extractRate t rt = {-# SCC "extractRate" #-} 57 | let since = rateSince rt 58 | lext = lastExt rt 59 | n = bytes rt 60 | oldWindow :: Double 61 | oldWindow = {-# SCC "diffUTC1" #-} realToFrac $ diffUTCTime lext since 62 | newWindow :: Double 63 | newWindow = {-# SCC "diffUTS2" #-} realToFrac $ diffUTCTime t since 64 | !r = {-# SCC "r" #-} (rate rt * oldWindow + (fromIntegral n)) / newWindow 65 | !nrt = {-# SCC "rt_creat" #-} 66 | rt { rate = r 67 | , bytes = 0 68 | , lastExt = t 69 | , rateSince = {-# SCC "max" #-} max since (addUTCTime (-maxRatePeriod) t) 70 | } 71 | in 72 | -- Update the rate and book-keep the missing pieces. The total is simply a built-in 73 | -- counter. The point where we expect the next update is pushed at most 5 seconds ahead 74 | -- in time. But it might come earlier if the rate is high. 75 | -- Last is updated with the current time. Finally, we move the windows earliest value 76 | -- forward if it is more than 20 seconds from now. 77 | (r, nrt) 78 | 79 | -- | The call @extractCount rt@ extract the bytes transferred since last extraction 80 | extractCount :: Rate -> (Int, Rate) 81 | extractCount rt = {-# SCC "extractCount" #-} (crt, rt { count = 0 }) 82 | where crt = count rt 83 | 84 | -------------------------------------------------------------------------------- /tools/visualize_stats.R: -------------------------------------------------------------------------------- 1 | #R sparklines 2 | sparkline<-function(ydata=rnorm(100,500,50),width=1.5,height=0.5,sigfigs=4) { 3 | 4 | # ydata = vector of data to be plotted 5 | # width = width of sparlkline in inches, including text 6 | # height = height of sparkline in inches 7 | # sigfigs = number of significant figures to round min, max, and last values to 8 | 9 | temppar<-par # store default graphics parameters 10 | par(mai=c(0.0,0.00,0.0,0.00),fin=c(width,height)) 11 | len<-length(ydata) # determine the length of the data set 12 | ymin<-min(ydata) # determine the minimum 13 | tmin<-which.min(ydata) # and its index 14 | ymax<-max(ydata) # determine the maximum 15 | tmax<-which.max(ydata) # and its index 16 | yfin<-signif(ydata[len],sigfigs) #determine most recent data point 17 | plotrange=c(ymin-0.3*(ymax-ymin),ymax+0.3*(ymax-ymin)) # define plot range to leave enough room for min and max circles and text 18 | plot(x=1:len,y=ydata,type="l",xlim=c(1,len*1.5),ylim=plotrange,col="gray30",lwd=0.5,ann=FALSE,axes=FALSE) # plot sparkline 19 | points(x=c(tmin,tmax),y=c(ymin,ymax),pch=19,col=c("red","blue"),cex=0.5) # plot min and max points 20 | text(x=len,y=ymin,labels=signif(ymin,sigfigs),cex=0.7,pos=4,col="red") # show minimum value 21 | text(x=len,y=ymax,labels=signif(ymax,sigfigs),cex=0.7,pos=4,col="blue") # show maximum value 22 | text(x=len,y=(ymin+ymax)/2,labels=yfin,cex=0.7,pos=4) # show most recent value 23 | par(temppar) # restore graphics defaults 24 | } 25 | 26 | stat <- read.csv("stat_db.csv", header=TRUE, sep="\t") 27 | attach(stat) 28 | productivity <- (MutCPU / (MutCPU+GCCPU)) 29 | cpunetwork <- ((MutCPU + GCCPU) / ((Uploaded + Downloaded + 1) / (1024*1024))) 30 | cpupnetwork <- subset(cpunetwork, Downloaded > (1024 * 1024)) 31 | max_space <- subset(MaxBytesUsed, MaxBytesUsed < (150*1024*1024)) 32 | peak_alloc <- subset(PeakMegabytesAlloc, PeakMegabytesAlloc < 100) 33 | cpupnetwork <- subset(cpupnetwork, cpupnetwork < 1) 34 | 35 | png(filename="MaxBytesUsed.png", width=640, height=78, bg="transparent") 36 | sparkline(max_space / (1024*1024), width=6, height=1) 37 | dev.off() 38 | svg(filename="MaxBytesUsed.svg", width=3, height=0.5, bg="transparent") 39 | sparkline(max_space / (1024*1024), width=3, height=0.5) 40 | dev.off() 41 | 42 | png(filename="PeakMegabytes.png", width=640, height=78, bg="transparent") 43 | sparkline(peak_alloc, width=6, height=1) 44 | dev.off() 45 | svg(filename="PeakMegabytes.svg", width=3, height=0.5, bg="transparent") 46 | sparkline(peak_alloc, width=3, height=0.5) 47 | dev.off() 48 | 49 | png(filename="Productivity.png", width=640, height=78, bg="transparent") 50 | sparkline(productivity, width=6, height=1) 51 | dev.off() 52 | svg(filename="Productivity.svg", width=3, height=0.5, bg="transparent") 53 | sparkline(productivity, width=3, height=0.5) 54 | dev.off() 55 | 56 | png(filename="CPUNet.png", width=640, height=78, bg="transparent") 57 | sparkline(cpupnetwork, width=6, height=1) 58 | dev.off() 59 | svg(filename="CPUNet.svg", width=3, height=0.5, bg="transparent") 60 | sparkline(cpupnetwork, width=3, height=0.5) 61 | dev.off() 62 | -------------------------------------------------------------------------------- /Combinatorrent.cabal: -------------------------------------------------------------------------------- 1 | name: Combinatorrent 2 | category: Network 3 | version: 0.3.2 4 | category: Network 5 | description: Combinatorrent provides a BitTorrent client, based on STM 6 | for concurrency. This is an early preview release which is capable of 7 | downloading files from various torrent trackers, but have not yet 8 | demonstrated to be correct in all aspects. 9 | 10 | It is expected that the package currently contains numerous and even 11 | grave bugs. Patches to fix any problem are welcome! 12 | cabal-version: >= 1.6 13 | 14 | license: BSD3 15 | license-file: LICENSE 16 | copyright: (c) 2009,2010 Jesper Louis Andersen 17 | author: Jesper Louis Andersen 18 | maintainer: jesper.louis.andersen@gmail.com 19 | stability: experimental 20 | synopsis: A concurrent bittorrent client 21 | tested-with: GHC ==6.12.1, GHC ==6.12.2, GHC ==6.13.20100426, GHC == 7.4.1 22 | build-type: Configure 23 | 24 | extra-tmp-files: src/Version.hs 25 | extra-source-files: src/Version.hs.in, configure 26 | data-files: AUTHORS, README.md 27 | 28 | flag debug 29 | description: Enable debug support 30 | default: True 31 | 32 | flag threaded 33 | description: Build with threaded runtime 34 | default: False 35 | 36 | flag threadscope 37 | description: Enable the eventlog necessary for ThreadScope 38 | default: False 39 | 40 | executable Combinatorrent 41 | hs-source-dirs: src 42 | main-is: Combinatorrent.hs 43 | other-modules: Protocol.BCode, Protocol.Wire, 44 | Data.Queue, Data.PieceSet, Data.PendingSet 45 | Process.ChokeMgr, Process.Console, Process.FS, Process.Listen, 46 | Process.PeerMgr, Process.Peer, Process.PieceMgr, Process.Status, 47 | Process.Timer, Process.Tracker, Process.TorrentManager 48 | Digest, FS, Channels, Process, RateCalc, 49 | Supervisor, Torrent, Test, TestInstance, Process.DirWatcher, 50 | Tracer, 51 | Process.Peer.Sender, 52 | Process.Peer.SenderQ, 53 | Process.Peer.Receiver 54 | 55 | build-depends: 56 | array >= 0.3, 57 | attoparsec >= 0.8, 58 | base >= 3.0, 59 | base < 5.0, 60 | bytestring, 61 | cereal >= 0.3 && < 0.6, 62 | containers, 63 | deepseq, 64 | directory, 65 | filepath, 66 | cryptohash, 67 | hslogger, 68 | HTTP, 69 | HUnit, 70 | mtl, 71 | network, 72 | network-uri, 73 | pretty, 74 | PSQueue, 75 | QuickCheck >= 2.4 && < 2.9, 76 | random, 77 | random-shuffle, 78 | stm, 79 | test-framework, 80 | test-framework-hunit, 81 | test-framework-quickcheck2, 82 | text, 83 | time 84 | 85 | extensions: CPP 86 | ghc-options: -Wall -fwarn-tabs -fno-warn-orphans -funbox-strict-fields -threaded -O2 87 | 88 | if impl(ghc >= 6.13.0) 89 | ghc-options: -rtsopts 90 | 91 | if !flag(debug) 92 | cpp-options: "-DNDEBUG" 93 | 94 | if flag(threadscope) 95 | ghc-options: -eventlog 96 | 97 | source-repository head 98 | type: git 99 | location: git://github.com/jlouis/combinatorrent.git 100 | branch: master 101 | 102 | 103 | 104 | -------------------------------------------------------------------------------- /src/Process/Peer/Receiver.hs: -------------------------------------------------------------------------------- 1 | module Process.Peer.Receiver 2 | ( start ) 3 | where 4 | 5 | import Control.Concurrent 6 | import Control.Concurrent.STM 7 | import Control.Exception (assert) 8 | 9 | import Control.Monad.Reader 10 | import Control.Monad.State 11 | 12 | import qualified Data.ByteString as B 13 | import Prelude hiding (log) 14 | 15 | import Data.Serialize.Get 16 | 17 | import Network.Socket hiding (send, sendTo, recv, recvFrom) 18 | import Network.Socket.ByteString 19 | 20 | import Channels 21 | import Process 22 | import Supervisor 23 | import Protocol.Wire 24 | 25 | 26 | data CF = CF { rpMsgCh :: TChan MsgTy } 27 | 28 | instance Logging CF where 29 | logName _ = "Process.Peer.Receiver" 30 | 31 | demandInput :: Int -> Process CF Socket B.ByteString 32 | demandInput l = {-# SCC "demandInput" #-} do 33 | s <- get 34 | bs <- liftIO $ recv s (fromIntegral l) 35 | when (B.null bs) stopP 36 | return bs 37 | 38 | start :: Socket -> TChan MsgTy -> SupervisorChannel -> IO ThreadId 39 | start s ch supC = do 40 | spawnP (CF ch) s 41 | ({-# SCC "Receiver" #-} catchP readSend 42 | (defaultStopHandler supC)) 43 | 44 | readSend :: Process CF Socket () 45 | readSend = do 46 | bs <- demandInput 2048 47 | loopHeader bs 48 | 49 | 50 | loopHeader :: B.ByteString -> Process CF Socket () 51 | loopHeader bs = {-# SCC "loopHeader" #-} 52 | let bsl = B.length bs 53 | in if bsl >= 4 54 | then let (l, r) = B.splitAt 4 bs 55 | ll = readW32 l 56 | in if ll == 0 57 | then loopHeader r -- KeepAlive 58 | else loopMsg [r] (fromIntegral (B.length r)) (readW32 l) 59 | else do 60 | inp <- demandInput 2048 61 | loopHeader (B.concat [bs, inp]) -- We bet on this get called rarely 62 | 63 | loopMsg :: [B.ByteString] -> Int -> Int -> Process CF Socket () 64 | loopMsg lbs sz l = {-# SCC "loopMsg" #-} do 65 | if sz >= l 66 | then do let (u, r) = 67 | B.splitAt (fromIntegral l) 68 | (case lbs of 69 | [x] -> x 70 | rest -> (B.concat $ reverse rest)) 71 | msg <- assert (B.length u == fromIntegral l) parseMsg l u 72 | c <- asks rpMsgCh 73 | liftIO . atomically $ writeTChan c (FromPeer (msg, fromIntegral l)) 74 | loopHeader r 75 | else do inp <- demandInput 4096 76 | loopMsg (inp : lbs) (sz + fromIntegral (B.length inp)) l 77 | 78 | readW32 :: B.ByteString -> Int 79 | readW32 lbs = {-# SCC "readW32" #-} 80 | let [b1,b2,b3,b4] = B.unpack lbs 81 | b1' = fromIntegral b1 82 | b2' = fromIntegral b2 83 | b3' = fromIntegral b3 84 | b4' = fromIntegral b4 85 | in (b4' + (256 * b3') + (256 * 256 * b2') + (256 * 256 * 256 * b1')) 86 | 87 | parseMsg :: Int -> B.ByteString -> Process CF Socket Message 88 | parseMsg _l u = {-# SCC "parseMsg" #-} 89 | case runGet decodeMsg u of 90 | Left err -> do warningP $ "Incorrect parse in receiver, context: " ++ show err 91 | stopP 92 | Right msg -> return msg 93 | -------------------------------------------------------------------------------- /src/Process.hs: -------------------------------------------------------------------------------- 1 | -- | Core Process code 2 | {-# LANGUAGE ExistentialQuantification, FlexibleInstances, 3 | GeneralizedNewtypeDeriving, 4 | ScopedTypeVariables, 5 | DeriveDataTypeable, 6 | MultiParamTypeClasses, CPP #-} 7 | 8 | module Process ( 9 | -- * Types 10 | Process 11 | -- * Interface 12 | , runP 13 | , spawnP 14 | , catchP 15 | , cleanupP 16 | , stopP 17 | -- * Log Interface 18 | , Logging(..) 19 | , logP 20 | , infoP 21 | , debugP 22 | , warningP 23 | , criticalP 24 | , errorP 25 | ) 26 | where 27 | 28 | import Control.Applicative 29 | import Control.Concurrent 30 | import Control.Exception 31 | 32 | import Control.Monad.Reader 33 | import Control.Monad.State.Strict 34 | 35 | import Data.Typeable 36 | 37 | import Prelude hiding (log) 38 | 39 | import System.Log.Logger 40 | 41 | -- | A @Process a b c@ is the type of processes with access to configuration data @a@, state @b@ 42 | -- returning values of type @c@. Usually, the read-only data are configuration parameters and 43 | -- channels, and the state the internal process state. It is implemented by means of a transformer 44 | -- stack on top of IO. 45 | newtype Process a b c = Process (ReaderT a (StateT b IO) c) 46 | deriving (Functor, Applicative, Monad, MonadIO, MonadState b, MonadReader a) 47 | 48 | data StopException = StopException 49 | deriving (Show, Typeable) 50 | 51 | instance Exception StopException 52 | 53 | stopP :: Process a b c 54 | stopP = throw StopException 55 | 56 | -- | Run the process monad given a configuation of type @a@ and a initial state of type @b@ 57 | runP :: a -> b -> Process a b c -> IO (c, b) 58 | runP c st (Process p) = runStateT (runReaderT p c) st 59 | 60 | -- | Spawn and run a process monad 61 | spawnP :: a -> b -> Process a b () -> IO ThreadId 62 | spawnP c st p = forkIO proc 63 | where proc = runP c st p >> return () 64 | 65 | -- | Run the process monad for its side effect, with a stopHandler if exceptions 66 | -- are raised in the process 67 | catchP :: Logging a => Process a b () -> Process a b () -> Process a b () 68 | catchP proc stopH = cleanupP proc stopH (return ()) 69 | 70 | -- | Run the process monad for its side effect. @cleanupP p sh ch@ describes to 71 | -- run @p@. If @p@ dies by a kill from a supervisor, run @ch@. Otherwise it runs 72 | -- @ch >> sh@ on death. 73 | cleanupP :: Logging a => Process a b () -> Process a b () -> Process a b () -> Process a b () 74 | cleanupP proc stopH cleanupH = do 75 | st <- get 76 | c <- ask 77 | (a, s') <- liftIO $ runP c st proc `catches` 78 | [ Handler (\ThreadKilled -> 79 | runP c st ( do infoP $ "Process Terminated by Supervisor" 80 | cleanupH )) 81 | , Handler (\StopException -> 82 | runP c st (do infoP $ "Process Terminating gracefully" 83 | cleanupH >> stopH)) -- This one is ok 84 | , Handler (\(ex :: SomeException) -> 85 | runP c st (do criticalP $ "Process exiting due to ex: " ++ show ex 86 | cleanupH >> stopH)) 87 | ] 88 | put s' 89 | return a 90 | 91 | ------ LOGGING 92 | 93 | -- 94 | -- | The class of types where we have a logger inside them somewhere 95 | class Logging a where 96 | -- | Returns a channel for logging and an Identifying string to use 97 | logName :: a -> String 98 | 99 | logP :: Logging a => Priority -> String -> Process a b () 100 | logP prio msg = do 101 | n <- asks logName 102 | liftIO $ logM n prio (n ++ ":\t" ++ msg) 103 | 104 | infoP, debugP, criticalP, warningP, errorP :: Logging a => String -> Process a b () 105 | infoP = logP INFO 106 | #ifdef NDEBUG 107 | debugP _ = return () 108 | #else 109 | debugP = logP DEBUG 110 | #endif 111 | criticalP = logP CRITICAL 112 | warningP = logP WARNING 113 | errorP = logP ERROR 114 | 115 | -------------------------------------------------------------------------------- /doc/Peer.dot: -------------------------------------------------------------------------------- 1 | digraph { 2 | node [fontname="URW Gothic L",fontsize=10]; 3 | edge [fontname="URW Gothic L",fontsize=8]; 4 | 5 | labeljust = l; 6 | labelloc = t; 7 | 8 | fontsize = 24; 9 | fontname="URW Gothic L"; 10 | label = "Peer Process FlowChart"; 11 | 12 | 13 | // External communication 14 | { rank=same; 15 | inbound [label="Inbound", shape=ellipse,color=deepskyblue4]; 16 | loop [label="Loop/Sync"]; 17 | } 18 | 19 | outbound [label="Outbound", shape=ellipse,color=deepskyblue4]; 20 | peerMgr [label="Peer Manager",shape=ellipse,color=deepskyblue4]; 21 | pieceMgr [label="Piece Mgr",shape=ellipse,color=deepskyblue4]; 22 | 23 | exit [label="Exit",color=red]; 24 | 25 | node [shape=box]; 26 | 27 | // Loop 28 | caseMsg [label="Case msg"]; 29 | casePMMsg [label="Case msg"]; 30 | 31 | inbound -> caseMsg [style=dotted,color=deepskyblue4]; 32 | loop -> caseMsg; 33 | peerMgr -> casePMMsg [style=dotted,color=deepskyblue4]; 34 | loop -> casePMMsg; 35 | 36 | { rank=same; 37 | piece [label="PIECE"]; 38 | have [label="HAVE"]; 39 | cancel [label="CANCEL"]; 40 | unchoke [label="UNCHOKE"]; 41 | choke [label="CHOKE"]; 42 | request [label="REQUEST"]; 43 | bitfield [label="BITFIELD"]; 44 | ochoke [label="OCHOKE"]; 45 | ounchoke [label="OUNCHOKE"]; 46 | } 47 | 48 | // OUnchoke 49 | 50 | casePMMsg -> ounchoke; 51 | ounchoke -> outbound [style=dotted,color=deepskyblue4]; 52 | ounchoke -> loop; 53 | 54 | // OChoke 55 | chokePrune [label="Prune Queue of\nPIECE messages",style=dotted,shape=box,color=deepskyblue4]; 56 | 57 | casePMMsg -> ochoke; 58 | ochoke -> chokePrune [style=dotted,color=deepskyblue4,arrowhead=none]; 59 | chokePrune -> outbound [style=dotted,color=deepskyblue4]; 60 | ochoke -> loop; 61 | 62 | // Bitfield 63 | constructPieceSet [label="Construct Piece\nSet"]; 64 | 65 | caseMsg -> bitfield; 66 | bitfield -> constructPieceSet; 67 | constructPieceSet -> exit [label="Not\nInit"]; 68 | constructPieceSet -> sizeCheck; 69 | 70 | // Piece 71 | pieceOk [label="Ok?"]; 72 | pieceStore [label="Store block;\nUpdate Queue"]; 73 | 74 | piece -> pieceOk; 75 | pieceOk -> loop [label="No"]; 76 | pieceOk -> pieceStore [label="Yes"]; 77 | 78 | pieceStore -> sizeCheck; 79 | pieceStore -> pieceMgr [style=dotted,color=deepskyblue4]; 80 | 81 | caseMsg -> piece; 82 | 83 | // Have 84 | haveOk [label="Ok?"]; 85 | 86 | caseMsg -> have; 87 | have -> haveOk; 88 | haveOk -> exit [label="No"]; 89 | haveOk -> sizeCheck [label="Yes"]; 90 | 91 | // Cancel 92 | cancelForward [label="Forward"]; 93 | 94 | caseMsg -> cancel; 95 | cancel -> cancelForward; 96 | cancelForward -> loop; 97 | cancelForward -> outbound [style="dotted",color=deepskyblue4]; 98 | 99 | // Unchoke 100 | caseMsg -> unchoke; 101 | unchoke -> sizeCheck; 102 | 103 | // Choke 104 | clearPieceQueue [label="Clear\nPiece Queue"]; 105 | 106 | caseMsg -> choke; 107 | choke -> clearPieceQueue; 108 | clearPieceQueue -> pieceMgr [style=dotted,color=deepskyblue4]; 109 | clearPieceQueue -> loop; 110 | 111 | // Request 112 | requestChoked [label="Choked?",shape=diamond]; 113 | requestIgnore [label="Ignore request"]; 114 | queueRequest [label="Queue request"]; 115 | 116 | caseMsg -> request; 117 | request -> requestChoked; 118 | requestChoked -> requestIgnore [label="Yes"]; 119 | requestIgnore -> loop; 120 | 121 | requestChoked -> queueRequest [label="No"]; 122 | 123 | queueRequest -> loop; 124 | queueRequest -> outbound [style=dotted,color=deepskyblue4]; 125 | 126 | 127 | 128 | subgraph cluster_fill_blocks { 129 | fontsize = 16; 130 | style = "dotted"; 131 | labelloc=b 132 | label = "Unchoke checked fill blocks"; 133 | 134 | sizeCheck [label="Sz < Lo watermark"]; 135 | grabBlocks [label="Grab up to\nHi-Sz blocks"]; 136 | queueBlocks [label="Queue new blocks"]; 137 | 138 | sizeCheck -> loop [label="No"]; 139 | sizeCheck -> grabBlocks [label="Yes"]; 140 | 141 | grabBlocks -> pieceMgr [style=dotted,color=deepskyblue4]; 142 | pieceMgr -> grabBlocks [style=dotted,color=deepskyblue4]; 143 | 144 | grabBlocks -> queueBlocks; 145 | queueBlocks -> loop; 146 | } 147 | } -------------------------------------------------------------------------------- /src/Data/Queue.hs: -------------------------------------------------------------------------------- 1 | -- | Simple Functional queues based on a double list. This usually achieves good amortized bounds 2 | module Data.Queue ( 3 | -- * Types 4 | Queue 5 | -- * Functions 6 | , empty 7 | , null 8 | , first 9 | , remove 10 | , push 11 | , pop 12 | , Data.Queue.partition 13 | , Data.Queue.filter 14 | -- * Test Suite 15 | , testSuite 16 | ) 17 | where 18 | 19 | import Data.List as Lst hiding (null) 20 | import Data.Maybe (fromJust) 21 | 22 | import Prelude hiding (null) 23 | 24 | import Test.QuickCheck 25 | import Test.Framework 26 | import Test.Framework.Providers.QuickCheck2 27 | import Test.Framework.Providers.HUnit 28 | import Test.HUnit hiding (Path, Test) 29 | 30 | data Queue a = Queue [a] [a] 31 | deriving (Eq, Show) 32 | 33 | -- | The definition of an empty Queue 34 | empty :: Queue a 35 | empty = Queue [] [] 36 | 37 | -- | Returns True on an empty Queue, and False otherwise. 38 | null :: Queue a -> Bool 39 | null (Queue [] []) = True 40 | null _ = False 41 | 42 | -- | Pushes a new element to the tail of the list. 43 | -- Operates in constant time. 44 | push :: a -> Queue a -> Queue a 45 | push e (Queue front back) = Queue front (e : back) 46 | 47 | 48 | -- | Pops the top most element off the queue. 49 | -- Operates in amortized constant time 50 | pop :: Queue a -> Maybe (a, Queue a) 51 | pop (Queue [] []) = Nothing 52 | pop (Queue (e : es) back) = Just (e, Queue es back) 53 | pop (Queue [] back) = pop (Queue (reverse back) []) 54 | 55 | -- | Return the head of the queue, if any 56 | first :: Queue a -> Maybe a 57 | first (Queue [] []) = Nothing 58 | first (Queue (e : _) _) = Just e 59 | first (Queue [] back) = Just $ last back -- Yeah slow 60 | 61 | -- | Kill the first element in the queue 62 | remove :: Queue a -> Queue a 63 | remove (Queue [] []) = Queue [] [] 64 | remove (Queue (_ : es) b) = Queue es b 65 | remove (Queue [] b) = remove (Queue (reverse b) []) 66 | 67 | -- | Generates a new Queue only containing elements for which 68 | -- p returns true. 69 | filter :: (a -> Bool) -> Queue a -> Queue a 70 | filter p (Queue front back) = Queue (Lst.filter p front) (Lst.filter p back) 71 | 72 | -- | Find elements matching an predicate and lift them out of the queue 73 | partition :: (a -> Bool) -> Queue a -> ([a], Queue a) 74 | partition p (Queue front back) = 75 | let (ft, fl) = Lst.partition p front 76 | (bt, bl) = Lst.partition p back 77 | in (ft ++ bt, Queue fl bl) 78 | 79 | --------------------------------------------------------------------- 80 | -- Tests 81 | 82 | testSuite :: Test 83 | testSuite = testGroup "Data/Queue" 84 | [ testCase "Empty Queue is Empty" testEmptyIsEmpty 85 | , testCase "First/Remove" testFirstRemove 86 | , testProperty "Simple push/pop" testPushPopSimple 87 | , testProperty "push/pop more" testPushPopMore 88 | , testProperty "push/pop interleave" testPushPopInterleave 89 | ] 90 | 91 | -- Rudimentary boring simple tests 92 | testEmptyIsEmpty :: Assertion 93 | testEmptyIsEmpty = do 94 | assertBool "for Empty Q" (null empty) 95 | assertBool "for non-Empty Q" (not $ null (push "Foo" empty)) 96 | assertEqual "for popping the Empty Q" (pop $ snd . fromJust . pop $ push "Foo" empty) Nothing 97 | 98 | testFirstRemove :: Assertion 99 | testFirstRemove = do 100 | -- Should really cover this more 101 | let nq = push 2 (push (1 :: Integer) empty) 102 | assertEqual "first" (first nq) (Just 1) 103 | assertEqual "first/removed" (first (remove nq)) (Just 2) 104 | assertEqual "emptied" (first (remove (remove nq))) Nothing 105 | 106 | testPushPopSimple :: String -> Bool 107 | testPushPopSimple s = 108 | let nq = pop (push s empty) 109 | in case nq of 110 | Nothing -> False 111 | Just (r, q) -> r == s && null q 112 | 113 | testPushPopMore :: [String] -> Bool 114 | testPushPopMore ls = 115 | let nq = foldl (flip push) empty ls 116 | popAll = unfoldr pop nq 117 | in popAll == ls 118 | 119 | data Operation = Push | Pop 120 | deriving (Eq, Show) 121 | 122 | instance Arbitrary Operation where 123 | arbitrary = oneof [return Push, return Pop] 124 | 125 | testPushPopInterleave :: [Operation] -> [String] -> Bool 126 | testPushPopInterleave ops ls = testQ empty ops ls [] 127 | where 128 | testQ q op lst res = 129 | case op of 130 | [] -> popAll q == reverse res 131 | Pop : r -> case pop q of 132 | Nothing -> testQ empty r lst [] 133 | Just (e, nq) -> 134 | if (last res) == e 135 | then testQ nq r lst (init res) 136 | else False 137 | Push : r -> case lst of 138 | [] -> testQ q r lst res 139 | (e : es) -> testQ (push e q) r es (e : res) 140 | popAll = unfoldr pop 141 | -------------------------------------------------------------------------------- /src/Process/TorrentManager.hs: -------------------------------------------------------------------------------- 1 | -- | The Manager Process - Manages the torrents and controls them 2 | module Process.TorrentManager ( 3 | -- * Types 4 | TorrentManagerMsg(..) 5 | -- * Channels 6 | , TorrentMgrChan 7 | -- * Interface 8 | , start 9 | ) 10 | where 11 | 12 | import Control.Concurrent 13 | import Control.Concurrent.STM 14 | 15 | import Control.Monad.State 16 | import Control.Monad.Reader 17 | 18 | import qualified Data.ByteString as B 19 | import Prelude hiding (log) 20 | 21 | import Protocol.BCode as BCode 22 | import Process 23 | import qualified Process.Status as Status 24 | import qualified Process.PeerMgr as PeerMgr 25 | import qualified Process.FS as FSP 26 | import qualified Process.PieceMgr as PieceMgr (start, createPieceDb) 27 | import qualified Process.ChokeMgr as ChokeMgr (ChokeMgrChannel) 28 | import qualified Process.Tracker as Tracker 29 | import Channels 30 | import FS 31 | import Supervisor 32 | import Torrent 33 | 34 | data TorrentManagerMsg = AddedTorrent FilePath 35 | | RemovedTorrent FilePath 36 | deriving (Eq, Show) 37 | 38 | type TorrentMgrChan = TChan [TorrentManagerMsg] 39 | 40 | data CF = CF { tCh :: TorrentMgrChan 41 | , tStatusCh :: Status.StatusChannel 42 | , tStatusTV :: TVar [Status.PStat] 43 | , tPeerId :: PeerId 44 | , tPeerMgrCh :: PeerMgr.PeerMgrChannel 45 | , tChokeCh :: ChokeMgr.ChokeMgrChannel 46 | } 47 | 48 | instance Logging CF where 49 | logName _ = "Process.TorrentManager" 50 | 51 | data ST = ST { workQueue :: [TorrentManagerMsg] } 52 | start :: TorrentMgrChan -- ^ Channel to watch for changes to torrents 53 | -> Status.StatusChannel 54 | -> TVar [Status.PStat] 55 | -> ChokeMgr.ChokeMgrChannel 56 | -> PeerId 57 | -> PeerMgr.PeerMgrChannel 58 | -> SupervisorChannel 59 | -> IO ThreadId 60 | start chan statusC stv chokeC pid peerC supC = 61 | spawnP (CF chan statusC stv pid peerC chokeC) (ST []) 62 | ({-# SCC "TorrentManager" #-} catchP pgm (defaultStopHandler supC)) 63 | where pgm = startStop >> dirMsg >> pgm 64 | dirMsg = do 65 | c <- asks tCh 66 | ls <- liftIO . atomically $ readTChan c 67 | modify (\s -> s { workQueue = ls ++ workQueue s }) 68 | startStop = do 69 | q <- gets workQueue 70 | case q of 71 | [] -> return () 72 | (AddedTorrent fp : rest) -> do 73 | debugP $ "Adding torrent file: " ++ fp 74 | _ <- startTorrent fp 75 | modify (\s -> s { workQueue = rest }) 76 | startStop 77 | (RemovedTorrent _ : _) -> do 78 | errorP "Removal of torrents not yet supported :P" 79 | stopP 80 | 81 | readTorrent :: FilePath -> Process CF ST BCode 82 | readTorrent fp = do 83 | torrent <- liftIO $ B.readFile fp 84 | let bcoded = BCode.decode torrent 85 | case bcoded of 86 | Left err -> do liftIO $ print err 87 | stopP 88 | Right bc -> return bc 89 | 90 | startTorrent :: FilePath -> Process CF ST (Maybe ThreadId) 91 | startTorrent fp = do 92 | bc <- readTorrent fp 93 | ti <- liftIO $ mkTorrentInfo bc 94 | sts <- do v <- liftIO newEmptyTMVarIO 95 | statusC <- asks tStatusCh 96 | liftIO . atomically $ writeTChan statusC (Status.RequestAllTorrents v) 97 | liftIO . atomically $ takeTMVar v 98 | case lookup (infoHash ti) sts of 99 | Nothing -> Just `fmap` startTorrent' fp bc ti 100 | Just _x -> return Nothing 101 | 102 | startTorrent' :: [Char] -> BCode -> TorrentInfo -> Process CF ST ThreadId 103 | startTorrent' fp bc ti = do 104 | fspC <- liftIO newTChanIO 105 | trackerC <- liftIO newTChanIO 106 | supC <- liftIO newTChanIO 107 | pieceMgrC <- liftIO newTChanIO 108 | chokeC <- asks tChokeCh 109 | statusC <- asks tStatusCh 110 | stv <- asks tStatusTV 111 | pid <- asks tPeerId 112 | pmC <- asks tPeerMgrCh 113 | (handles, haveMap, pieceMap) <- liftIO $ openAndCheckFile bc 114 | let left = bytesLeft haveMap pieceMap 115 | pieceDb <- PieceMgr.createPieceDb haveMap pieceMap 116 | (tid, _) <- liftIO $ allForOne ("TorrentSup - " ++ fp) 117 | [ Worker $ FSP.start handles pieceMap fspC 118 | , Worker $ PieceMgr.start pieceMgrC fspC chokeC statusC pieceDb (infoHash ti) 119 | , Worker $ Tracker.start (infoHash ti) ti pid defaultPort statusC trackerC pmC 120 | ] supC 121 | liftIO . atomically $ writeTChan statusC $ Status.InsertTorrent (infoHash ti) left trackerC 122 | c <- asks tPeerMgrCh 123 | liftIO . atomically $ writeTChan c $ PeerMgr.NewTorrent (infoHash ti) 124 | (PeerMgr.TorrentLocal pieceMgrC fspC stv pieceMap) 125 | liftIO . atomically $ writeTChan trackerC Start 126 | return tid 127 | -------------------------------------------------------------------------------- /src/Torrent.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP, DeriveGeneric, FlexibleContexts #-} 2 | 3 | -- | The following module is responsible for general types used 4 | -- throughout the system. 5 | module Torrent ( 6 | -- * Types 7 | InfoHash 8 | , PeerId 9 | , AnnounceURL 10 | , TorrentState(..) 11 | , TorrentInfo(..) 12 | , PieceNum 13 | , PieceSize 14 | , PieceMap 15 | , PiecesDoneMap 16 | , PieceInfo(..) 17 | , BlockSize 18 | , Block(..) 19 | , Capabilities(..) 20 | -- * Interface 21 | , bytesLeft 22 | , defaultBlockSize 23 | , defaultOptimisticSlots 24 | , defaultPort 25 | , mkPeerId 26 | , mkTorrentInfo 27 | ) 28 | where 29 | 30 | #if __GLASGOW_HASKELL__ <= 708 31 | import AdaptGhcVersion 32 | #endif 33 | 34 | import Control.DeepSeq 35 | 36 | import Data.Array 37 | import Data.List 38 | import Data.Maybe (fromMaybe) 39 | import qualified Data.ByteString as B 40 | import qualified Data.Map as M 41 | import Data.Word 42 | import GHC.Generics 43 | import Numeric 44 | 45 | import System.Random 46 | import System.Random.Shuffle 47 | import Test.QuickCheck 48 | 49 | import Protocol.BCode 50 | import Digest 51 | import Version 52 | 53 | -- | The type of Infohashes as used in torrents. These are identifiers 54 | -- of torrents 55 | type InfoHash = Digest 56 | 57 | -- | The peerId is the ID of a client. It is used to identify clients 58 | -- from each other 59 | type PeerId = String 60 | 61 | -- | The internal type of Announce URLs 62 | type AnnounceURL = B.ByteString 63 | 64 | -- | Internal type for a torrent. It identifies a torrent in various places of the system. 65 | data TorrentInfo = TorrentInfo { 66 | infoHash :: InfoHash, 67 | pieceCount :: Int, -- Number of pieces in torrent 68 | announceURLs :: [[AnnounceURL]] 69 | } deriving Show 70 | 71 | data TorrentState = Seeding | Leeching 72 | deriving (Show, Generic) 73 | 74 | instance NFData TorrentState 75 | 76 | ---------------------------------------------------------------------- 77 | -- Capabilities 78 | 79 | data Capabilities = Fast | Extended 80 | deriving (Show, Eq) 81 | 82 | -- PIECES 83 | ---------------------------------------------------------------------- 84 | type PieceNum = Int 85 | type PieceSize = Int 86 | 87 | data PieceInfo = PieceInfo { 88 | offset :: !Integer, -- ^ Offset of the piece, might be greater than Int 89 | len :: !Integer, -- ^ Length of piece; usually a small value 90 | digest :: !B.ByteString -- ^ Digest of piece; taken from the .torret file 91 | } deriving (Eq, Show) 92 | 93 | type PieceMap = Array PieceNum PieceInfo 94 | 95 | -- | The PiecesDoneMap is a map which is true if we have the piece and false otherwise 96 | type PiecesDoneMap = M.Map PieceNum Bool 97 | 98 | -- | Return the amount of bytes left on a torrent given what pieces are done and the 99 | -- map of the shape of the torrent in question. 100 | bytesLeft :: PiecesDoneMap -> PieceMap -> Integer 101 | bytesLeft done pm = 102 | foldl' (\accu (k,v) -> 103 | case M.lookup k done of 104 | Just False -> (len v) + accu 105 | _ -> accu) 0 $ Data.Array.assocs pm 106 | 107 | -- BLOCKS 108 | ---------------------------------------------------------------------- 109 | type BlockSize = Int 110 | 111 | data Block = Block { blockOffset :: !Int -- ^ offset of this block within the piece 112 | , blockSize :: !BlockSize -- ^ size of this block within the piece 113 | } deriving (Eq, Ord, Show) 114 | 115 | instance NFData Block where 116 | rnf (Block bo sz) = rnf bo `seq` rnf sz `seq` () 117 | 118 | instance Arbitrary Block where 119 | arbitrary = Block <$> pos <*> pos 120 | where pos = choose (0, 4294967296 - 1) 121 | 122 | 123 | defaultBlockSize :: BlockSize 124 | defaultBlockSize = 16384 -- Bytes 125 | 126 | -- | Default number of optimistic slots 127 | defaultOptimisticSlots :: Int 128 | defaultOptimisticSlots = 2 129 | 130 | -- | Default port to communicate on 131 | defaultPort :: Word16 132 | defaultPort = 1579 133 | 134 | -- | Convert a BCode block into its corresponding TorrentInfo block, perhaps 135 | -- failing in the process. 136 | mkTorrentInfo :: BCode -> IO TorrentInfo 137 | mkTorrentInfo bc = do 138 | (ann, np) <- case queryInfo bc of Nothing -> fail "Could not create torrent info" 139 | Just x -> return x 140 | ih <- hashInfoDict bc 141 | let alist = fromMaybe [[ann]] $ announceList bc 142 | -- BEP012 says that lists of URL inside each tier must be shuffled 143 | gen <- newStdGen 144 | let alist' = map (\xs -> shuffle' xs (length xs) gen) alist 145 | return TorrentInfo { infoHash = ih, pieceCount = np, announceURLs = alist'} 146 | where 147 | queryInfo b = 148 | do ann <- announce b 149 | np <- numberPieces b 150 | return (ann, np) 151 | 152 | -- | Create a new PeerId for this client 153 | mkPeerId :: StdGen -> PeerId 154 | mkPeerId gen = header ++ take (20 - length header) ranString 155 | where randomList :: Int -> StdGen -> [Int] 156 | randomList n = take n . unfoldr (Just . random) 157 | rs = randomList 10 gen 158 | ranString = concatMap (\i -> showHex (abs i) "") rs 159 | header = "-CT" ++ protoVersion ++ "-" 160 | -------------------------------------------------------------------------------- /doc/on-hieraches.md: -------------------------------------------------------------------------------- 1 | On Process Hierachies 2 | ===================== 3 | 4 | I have been in thinking mode the last couple of days. It all started 5 | when I decided to look at the problem of *wrong* in 6 | haskell-torrent. What will happen when something fails? Joe Armstrong 7 | has part of the answer by arguing that we must proactively expect a 8 | process to fail and then let some other process clean it up 9 | (paraphrased a lot by me, these are not his exact words). 10 | 11 | Take a look at this image, 12 | 13 | {} 14 | 15 | It is a [hypergraph](http://en.wikipedia.org/wiki/Hypergraph) where 16 | each vertex corresponds to a process and each channel corresponds to 17 | an edge. The graph is undirected because in principle each process can 18 | both send and receive on the channel in question. In practice however, 19 | one can limit the communication as some processes will only transmit 20 | on a channel, and others will only receive. 21 | 22 | To simplify the graph, I omit local RPC channels. It is customary to 23 | implement RPC by creating a local channel and transmitting that 24 | channel over one of the hypergraph edges. Response can then work 25 | directly on the local channel, simplifying the problem of giving 26 | *identity* to processes in many cases. 27 | 28 | Termination 29 | ----------- 30 | 31 | A scary part of such a network is what happens when we want to 32 | terminate it or if something goes wrong in one of the processes. In 33 | particular, we have the problem that while Haskell is a pure, nice, 34 | protected Nirvana -- the real world inside IO is a dank, dark place of 35 | twisted little passages (all alike). Thus it might very well be that 36 | we get to experience an exception first-hand in one of the above 37 | processes. 38 | 39 | I have toyed with the idea of copying the excellent CHP library by 40 | Neil Brown. Neil defines a concept of *poison* for channels. A channel 41 | that has been poisoned will always transmit poison. Poison thus 42 | propagates through the hypergraph and slowly but surely kills off each 43 | process. Neil has a description of the process 44 | [on his blog](http://chplib.wordpress.com/2009/09/30/poison-concurrent-termination/). I 45 | like the idea because it uses the existing infrastructure of the 46 | hypergraph. It would be fairly easy to add support for writing poison 47 | handlers at each node. 48 | 49 | Process Hierachies 50 | ------------------ 51 | 52 | I have pondered on a different scheme for the last couple of days 53 | however. We can impose a *location graph* on the hypergraph above: 54 | 55 | {} 56 | 57 | This graph, a tree, describes the location or hierarchy of the 58 | processes in question. The processes named S0, S1, ... and so forth 59 | are *supervisors*. Their responsibility is only one: Keep their 60 | children running according to a *policy*. Erlang programmers fluent in 61 | the OTP high level libraries present in the Erlang distribution will 62 | recognize the supervisor term. 63 | 64 | A normal (*worker*) process has two rules: 65 | 66 | - If the process terminates, it must inform its supervisor. 67 | - If the process gets an asynchronous KillThread exception, it 68 | *must* die. 69 | 70 | A supervisor process needs to obey the following scheme: 71 | 72 | - If a process under its supervision dies it must take affair 73 | according to its policy. This may mean that it kills off all 74 | other children and reports back to its supervisor for 75 | instance. Or it may mean that it simply restarts the process 76 | that got killed. What happens is dependent on the policy we 77 | configured the supervisor with. 78 | 79 | - If a supervisor is asked to shutdown by a KillThread exception, 80 | it must first kill all of its children. 81 | 82 | Note that with these rules, termination is almost free: Terminate the 83 | tree. Because everything is linked to the tree, termination will 84 | happen. We will terminate by asynchronous exceptions aggressively. 85 | 86 | An interesting difference from Erlang is that of communication, which 87 | is a certain sense is Dual in Haskell+CML: In Erlang, 88 | the process id is what you need to hold in order to transmit an 89 | (async.) message to the process. In our CML-based system the *channel* 90 | is the name you must hold in order to communicate. In other words, 91 | this lets us, in some circumstances, replace the process with a new 92 | one but keeping the channel. Note, though, that the interface is not 93 | completely clean: if the Haskell runtime figures out a thread is 94 | indefinitely blocked, it will in general throw an exception -- but I 95 | have not yet read the CML code in detail so I do not know if this will 96 | happen. It depends on what MVar references a channel hold. 97 | 98 | Another view 99 | ------------ 100 | 101 | The hypergraph and location graph constitutes what is sometimes called 102 | a *Bigraph*. This is not to be confused with the term Bi*partite* 103 | graph which is a completely different term but sometimes also called a 104 | bigraph unfortunately. I somewhat resist the idea to use the same term 105 | for different concepts, but 106 | [work](http://www.itu.dk/~mikkelbu/research/bigraphsbib/index.html) 107 | done in this twi-graph concept presented in this post on 108 | [bigraphs](http://www.itu.dk/research/pls/wiki/index.php/A_Brief_Introduction_To_Bigraphs) 109 | suggests that the term is quite used in this respect. 110 | 111 | The research seems different from what I need 112 | however. [ITU](http://www.itu.dk) has done extensive research in 113 | bigraphical programming languages, letting them describe various known 114 | calculi (Lambda, Pi, etc.). I just want to use the descriptive power 115 | of the bigraphs to discriminate *location* from *communication*. 116 | 117 | -------------------------------------------------------------------------------- /src/Supervisor.hs: -------------------------------------------------------------------------------- 1 | -- | Erlang style supervisors for Haskell. 2 | -- Note that yet, these are not really good enough for using in other projects. 3 | -- are currently subject to change until I figure out how a nice interface will 4 | -- look like. At that moment they could be split off into their own package. 5 | {-# LANGUAGE ScopedTypeVariables #-} 6 | module Supervisor ( 7 | -- * Types 8 | Child(..) 9 | , Children 10 | , SupervisorMsg(..) 11 | , SupervisorChannel 12 | -- * Supervisor Initialization 13 | , allForOne 14 | , oneForOne 15 | -- * helper calls 16 | , pDie 17 | , defaultStopHandler 18 | ) 19 | where 20 | 21 | import Control.Concurrent 22 | import Control.Concurrent.STM 23 | import Control.Monad.State 24 | import Control.Monad.Reader 25 | 26 | import Prelude 27 | 28 | import Process 29 | 30 | data Child = Supervisor (SupervisorChannel -> IO (ThreadId, SupervisorChannel)) 31 | | Worker (SupervisorChannel -> IO ThreadId) 32 | 33 | instance Show Child where 34 | show (Supervisor _) = "Supervisor" 35 | show (Worker _) = "Worker" 36 | 37 | data SupervisorMsg = IAmDying ThreadId 38 | | PleaseDie ThreadId 39 | | SpawnNew Child 40 | deriving Show 41 | 42 | type SupervisorChannel = TChan SupervisorMsg 43 | type Children = [Child] 44 | 45 | data ChildInfo = HSupervisor ThreadId 46 | | HWorker ThreadId 47 | 48 | data RestartPolicy = AllForOne | OneForOne 49 | 50 | pDie :: SupervisorChannel -> IO () 51 | pDie supC = do 52 | tid <- myThreadId 53 | atomically $ writeTChan supC (IAmDying tid) 54 | 55 | data CF = CF { name :: String -- ^ Name of the supervisor 56 | , chan :: SupervisorChannel -- ^ Channel of the supervisor 57 | , parent :: SupervisorChannel -- ^ Channel of the parent supervisor 58 | , restartPolicy :: RestartPolicy } 59 | 60 | instance Logging CF where 61 | logName = name 62 | 63 | data ST = ST { childInfo :: [ChildInfo] } 64 | 65 | start :: RestartPolicy -> String -> Children -> SupervisorChannel -> IO (ThreadId, 66 | SupervisorChannel) 67 | start policy n children parentC = do 68 | c <- newTChanIO 69 | t <- spawnP (CF n c parentC policy) (ST []) (catchP (startup children) 70 | (defaultStopHandler parentC)) 71 | return (t, c) 72 | 73 | startup :: [Child] -> Process CF ST () 74 | startup children = do 75 | spawnedChildren <- mapM spawnChild children 76 | put $ ST (reverse spawnedChildren) 77 | forever eventLoop 78 | 79 | eventLoop :: Process CF ST () 80 | eventLoop = do 81 | mTid <- liftIO myThreadId 82 | pc <- asks parent 83 | ch <- asks chan 84 | m <- liftIO . atomically $ 85 | (readTChan ch >>= return . Left) `orElse` 86 | (readTChan pc >>= return . Right) 87 | case m of 88 | Left (IAmDying tid) -> handleIAmDying tid 89 | Left (SpawnNew chld) -> handleSpawnNew chld 90 | Right (PleaseDie tid) | tid == mTid -> handlePleaseDie 91 | _ -> return () -- Ignore these. Since the chan is duped, we get stray messages from above 92 | 93 | handleIAmDying :: ThreadId -> Process CF ST () 94 | handleIAmDying tid = do 95 | p <- asks restartPolicy 96 | case p of 97 | AllForOne -> do 98 | gets childInfo >>= mapM_ finChild 99 | stopP 100 | OneForOne -> 101 | pruneChild tid 102 | 103 | handleSpawnNew :: Child -> Process CF ST () 104 | handleSpawnNew chld = do 105 | nc <- spawnChild chld 106 | modify (\(ST cs) -> ST (nc : cs)) 107 | 108 | handlePleaseDie :: Process CF ST () 109 | handlePleaseDie = do 110 | gets childInfo >>= mapM_ finChild 111 | stopP 112 | 113 | 114 | pruneChild :: ThreadId -> Process CF ST () 115 | pruneChild tid = modify (\(ST cs) -> ST (filter chk cs)) 116 | where chk (HSupervisor t) = t == tid 117 | chk (HWorker t) = t == tid 118 | 119 | -- | A One-for-one supervisor is called with @oneForOne children parentCh@. It will spawn and run 120 | -- @children@ and be linked into the supervisor structure on @parentCh@. It returns the ThreadId 121 | -- of the supervisor itself and the Channel of which it is the controller. 122 | -- 123 | -- Should a process die, the one-for-one supervisor will do nothing about it. It will just record 124 | -- the death and let the other processes keep running. 125 | -- 126 | -- TODO: Restart policies. 127 | oneForOne :: String -> Children -> SupervisorChannel -> IO (ThreadId, SupervisorChannel) 128 | oneForOne = start OneForOne 129 | 130 | 131 | -- | Run a set of processes and do it once in the sense that if someone dies, 132 | -- no restart is attempted. We will just kill off everybody without any kind 133 | -- of prejudice. 134 | allForOne :: String -> Children -> SupervisorChannel -> IO (ThreadId, SupervisorChannel) 135 | allForOne = start AllForOne 136 | 137 | 138 | finChild :: ChildInfo -> Process CF ST () 139 | finChild (HWorker tid) = liftIO $ killThread tid 140 | finChild (HSupervisor tid) = do 141 | c <- asks chan 142 | liftIO . atomically $ writeTChan c (PleaseDie tid) 143 | 144 | spawnChild :: Child -> Process CF ST ChildInfo 145 | spawnChild (Worker proc) = do 146 | c <- asks chan 147 | nc <- liftIO . atomically $ dupTChan c 148 | tid <- liftIO $ proc nc 149 | return $ HWorker tid 150 | spawnChild (Supervisor proc) = do 151 | c <- asks chan 152 | nc <- liftIO . atomically $ dupTChan c 153 | (tid, _) <- liftIO $ proc nc 154 | return $ HSupervisor tid 155 | 156 | defaultStopHandler :: SupervisorChannel -> Process a b () 157 | defaultStopHandler supC = do 158 | t <- liftIO $ myThreadId 159 | liftIO . atomically $ writeTChan supC $ IAmDying t 160 | 161 | -------------------------------------------------------------------------------- /src/Combinatorrent.hs: -------------------------------------------------------------------------------- 1 | module Main (main) 2 | where 3 | 4 | import Control.Concurrent 5 | import Control.Concurrent.STM 6 | import Control.Monad 7 | import Control.Monad.State 8 | 9 | import Data.Maybe 10 | import Data.List 11 | 12 | import System.Environment 13 | import System.Random 14 | 15 | import System.Console.GetOpt 16 | import System.Directory (doesDirectoryExist) 17 | import System.FilePath () 18 | import System.Log.Logger 19 | import System.Log.Handler.Simple 20 | import System.IO as SIO 21 | 22 | import qualified Process.Console as Console 23 | import qualified Process.PeerMgr as PeerMgr 24 | import qualified Process.ChokeMgr as ChokeMgr (start) 25 | import qualified Process.Listen as Listen 26 | import qualified Process.DirWatcher as DirWatcher (start) 27 | import qualified Process.Status as Status (start, StatusChannel, PStat) 28 | import qualified Process.TorrentManager as TorrentManager (start, TorrentMgrChan, TorrentManagerMsg(..)) 29 | 30 | import Supervisor 31 | import Torrent 32 | import Version 33 | import qualified Test 34 | 35 | main :: IO () 36 | main = do args <- getArgs 37 | if "--tests" `elem` args 38 | then Test.runTests 39 | else progOpts args >>= run 40 | 41 | -- COMMAND LINE PARSING 42 | 43 | data Flag = Version | Debug | LogFile FilePath | WatchDir FilePath | StatFile FilePath 44 | deriving (Eq, Show) 45 | 46 | options :: [OptDescr Flag] 47 | options = 48 | [ Option ['V','?'] ["version"] (NoArg Version) "Show version number" 49 | , Option ['D'] ["debug"] (NoArg Debug) "Spew extra debug information" 50 | , Option [] ["logfile"] (ReqArg LogFile "FILE") "Choose a filepath on which to log" 51 | , Option ['W'] ["watchdir"] (ReqArg WatchDir "DIR") "Choose a directory to watch for torrents" 52 | , Option ['S'] ["statfile"] (ReqArg StatFile "FILE") "Choose a file to gather stats into" 53 | ] 54 | 55 | (~=) :: Flag -> Flag -> Bool 56 | Version ~= Version = True 57 | Debug ~= Debug = True 58 | LogFile _ ~= LogFile _ = True 59 | WatchDir _ ~= WatchDir _ = True 60 | StatFile _ ~= StatFile _ = True 61 | _ ~= _ = False 62 | 63 | flag :: Flag -> [Flag] -> Maybe Flag 64 | flag x = find (x ~=) 65 | 66 | progOpts :: [String] -> IO ([Flag], [String]) 67 | progOpts args = do 68 | case getOpt Permute options args of 69 | (o,n,[] ) -> return (o, n) 70 | (_,_,errs) -> ioError (userError (concat errs ++ usageInfo header options)) 71 | where header = "Usage: Combinatorrent [OPTION...] file" 72 | 73 | run :: ([Flag], [String]) -> IO () 74 | run (flags, files) = do 75 | if Version `elem` flags 76 | then progHeader 77 | else case files of 78 | [] | isNothing $ flag (WatchDir "") flags -> putStrLn "No torrentfile input" 79 | names -> progHeader >> download flags names 80 | 81 | progHeader :: IO () 82 | progHeader = putStrLn $ "This is Combinatorrent \x2620 version " ++ version ++ "\n" ++ 83 | " For help type 'help'\n" 84 | 85 | setupLogging :: [Flag] -> IO () 86 | setupLogging flags = do 87 | fLog <- case flag (LogFile "") flags of 88 | Nothing -> streamHandler SIO.stdout DEBUG 89 | Just (LogFile fp) -> fileHandler fp DEBUG 90 | Just _ -> error "Impossible match" 91 | when (Debug `elem` flags) 92 | (updateGlobalLogger rootLoggerName 93 | (setHandlers [fLog] . (setLevel DEBUG))) 94 | 95 | setupDirWatching :: [Flag] -> TorrentManager.TorrentMgrChan -> IO [Child] 96 | setupDirWatching flags watchC = do 97 | case flag (WatchDir "") flags of 98 | Nothing -> return [] 99 | Just (WatchDir dir) -> do 100 | ex <- doesDirectoryExist dir 101 | if ex 102 | then do return [ Worker $ DirWatcher.start dir watchC ] 103 | else do putStrLn $ "Directory does not exist, not watching" 104 | return [] 105 | Just _ -> error "Impossible match" 106 | 107 | setupStatus :: [Flag] -> Status.StatusChannel -> TVar [Status.PStat] -> Child 108 | setupStatus flags statusC stv = 109 | case flag (StatFile "") flags of 110 | Nothing -> Worker $ Status.start Nothing statusC stv 111 | Just (StatFile fn) -> Worker $ Status.start (Just fn) statusC stv 112 | Just _ -> error "Impossible match" 113 | 114 | generatePeerId :: IO PeerId 115 | generatePeerId = do 116 | gen <- getStdGen 117 | return $ mkPeerId gen 118 | 119 | download :: [Flag] -> [String] -> IO () 120 | download flags names = do 121 | setupLogging flags 122 | watchC <- liftIO newTChanIO 123 | workersWatch <- setupDirWatching flags watchC 124 | -- setup channels 125 | statusC <- liftIO $ newTChanIO 126 | waitC <- liftIO $ newEmptyTMVarIO 127 | supC <- liftIO newTChanIO 128 | pmC <- liftIO $ newTChanIO 129 | chokeC <- liftIO $ newTChanIO 130 | rtv <- atomically $ newTVar [] 131 | stv <- atomically $ newTVar [] 132 | debugM "Main" "Created channels" 133 | pid <- generatePeerId 134 | (tid, _) <- allForOne "MainSup" 135 | (workersWatch ++ 136 | [ Worker $ Console.start waitC statusC 137 | , Worker $ TorrentManager.start watchC statusC stv chokeC pid pmC 138 | , setupStatus flags statusC stv 139 | , Worker $ PeerMgr.start pmC pid chokeC rtv 140 | , Worker $ ChokeMgr.start chokeC rtv 100 -- 100 is upload rate in KB 141 | , Worker $ Listen.start defaultPort pmC 142 | ]) supC 143 | atomically $ writeTChan watchC (map TorrentManager.AddedTorrent names) 144 | _ <- atomically $ takeTMVar waitC 145 | infoM "Main" "Closing down, giving processes 10 seconds to cool off" 146 | atomically $ writeTChan supC (PleaseDie tid) 147 | threadDelay $ 10*1000000 148 | infoM "Main" "Done..." 149 | return () 150 | 151 | -------------------------------------------------------------------------------- /src/Process/Status.hs: -------------------------------------------------------------------------------- 1 | -- | The status code runs a Status Process. This process keeps track 2 | -- of a number of interval valies for a given torrent file and it 3 | -- periodically updates the tracker process with the relevant 4 | -- information about data uploaded, downloaded and how much is 5 | -- left. The tracker is then responsible for using this data 6 | -- correctly to tell the tracker what to do 7 | {-# LANGUAGE FlexibleInstances #-} 8 | module Process.Status ( 9 | -- * Types 10 | StatusMsg(..) 11 | , PStat(..) 12 | -- * Channels 13 | , StatusChannel 14 | -- * State 15 | , StatusState(uploaded, downloaded, left) 16 | -- * Interface 17 | , start 18 | ) 19 | where 20 | 21 | import Control.Concurrent 22 | import Control.Concurrent.STM 23 | import Control.DeepSeq 24 | import Control.Exception (assert) 25 | 26 | import Control.Monad.Reader 27 | import Control.Monad.State hiding (state) 28 | 29 | import Data.IORef 30 | import qualified Data.Map as M 31 | 32 | import Prelude hiding (log) 33 | 34 | import Channels 35 | import Process 36 | import Process.Timer 37 | import Supervisor 38 | import Torrent 39 | import Version 40 | 41 | data StatusMsg = TrackerStat { trackInfoHash :: InfoHash 42 | , trackIncomplete :: Maybe Integer 43 | , trackComplete :: Maybe Integer } 44 | | CompletedPiece InfoHash Integer 45 | | InsertTorrent InfoHash Integer TrackerChannel 46 | | RemoveTorrent InfoHash 47 | | TorrentCompleted InfoHash 48 | | RequestStatus InfoHash (TMVar StatusState) 49 | | RequestAllTorrents (TMVar [(InfoHash, StatusState)]) 50 | | StatusTimerTick 51 | 52 | data PStat = PStat { pInfoHash :: InfoHash 53 | , pUploaded :: Integer 54 | , pDownloaded :: Integer } 55 | 56 | type StatusChannel = TChan StatusMsg 57 | 58 | data CF = CF { statusCh :: StatusChannel, 59 | statusTV :: TVar [PStat] } 60 | 61 | instance Logging CF where 62 | logName _ = "Process.Status" 63 | 64 | type ST = M.Map InfoHash StatusState 65 | 66 | data StatusState = SState 67 | { uploaded :: Integer 68 | , downloaded :: Integer 69 | , left :: Integer 70 | , incomplete :: Maybe Integer 71 | , complete :: Maybe Integer 72 | , state :: TorrentState 73 | , trackerMsgCh :: TrackerChannel 74 | } 75 | 76 | instance NFData StatusState where 77 | rnf (SState up down l inc comp st _) = 78 | rnf up `seq` rnf down `seq` rnf l `seq` rnf inc `seq` rnf comp `seq` rnf st `seq` () 79 | 80 | gatherStats :: (Integer, Integer) -> [(String, String)] 81 | gatherStats (upload, download) = 82 | [("uploaded", show upload), ("downloaded", show download), 83 | ("version", version)] 84 | 85 | instance Show StatusState where 86 | show (SState up down l inc comp st _) = concat 87 | ["{ Uploaded: " ++ show up ++ "\n" 88 | ," Downloaded: " ++ show down ++ "\n" 89 | ," Left: " ++ show l ++ "\n" 90 | ," State: " ++ show st ++ "\n" 91 | ," Complete: " ++ show comp ++ "\n" 92 | ," Incomplete: " ++ show inc ++ " }"] 93 | 94 | -- | Start a new Status process with an initial torrent state and a 95 | -- channel on which to transmit status updates to the tracker. 96 | start :: Maybe FilePath -> StatusChannel -> TVar [PStat] -> SupervisorChannel -> IO ThreadId 97 | start fp statusC tv supC = do 98 | r <- newIORef (0,0) 99 | _ <- registerSTM 5 statusC StatusTimerTick 100 | spawnP (CF statusC tv) M.empty 101 | ({-# SCC "Status" #-} cleanupP (pgm r) (defaultStopHandler supC) (cleanup r)) 102 | where 103 | cleanup r = do 104 | st <- liftIO $ readIORef r 105 | case fp of 106 | Nothing -> return () 107 | Just fpath -> liftIO $ writeFile fpath (show . gatherStats $ st) 108 | pgm r = do 109 | fetchUpdates r 110 | ch <- asks statusCh 111 | x <- liftIO . atomically $ readTChan ch 112 | recvMsg x 113 | pgm r 114 | 115 | newMap :: Integer -> TrackerChannel -> StatusState 116 | newMap l trackerMsgC = 117 | SState 0 0 l Nothing Nothing (if l == 0 then Seeding else Leeching) trackerMsgC 118 | 119 | recvMsg :: StatusMsg -> Process CF ST () 120 | recvMsg msg = 121 | case msg of 122 | TrackerStat ih ic c -> do 123 | modify (\s -> M.adjust (\st -> st { incomplete = ic, complete = c }) ih s) 124 | CompletedPiece ih bytes -> do 125 | modify (\s -> M.adjust (\st -> st { left = (left st) - bytes }) ih s) 126 | InsertTorrent ih l trackerMsgC -> 127 | modify (\s -> M.insert ih (newMap l trackerMsgC) s) 128 | RemoveTorrent ih -> modify (\s -> M.delete ih s) 129 | RequestStatus ih v -> do 130 | s <- get 131 | case M.lookup ih s of 132 | Nothing -> fail "Unknown InfoHash" 133 | Just st -> liftIO . atomically $ putTMVar v st 134 | RequestAllTorrents v -> do 135 | s <- get 136 | liftIO . atomically $ putTMVar v (M.toList s) 137 | TorrentCompleted ih -> do 138 | mp <- get 139 | let q = M.lookup ih mp 140 | ns <- maybe (fail "Unknown Torrent") return q 141 | assert (left ns == 0) (return ()) 142 | liftIO . atomically $ writeTChan (trackerMsgCh ns) Complete 143 | modify (\s -> M.insert ih (ns { state = Seeding}) s) 144 | StatusTimerTick -> do ch <- asks statusCh 145 | registerSTM 5 ch StatusTimerTick >> return () 146 | 147 | 148 | fetchUpdates :: IORef (Integer, Integer) -> Process CF ST () 149 | fetchUpdates r = do 150 | tv <- asks statusTV 151 | updates <- liftIO . atomically $ do 152 | updates <- readTVar tv 153 | writeTVar tv [] 154 | return updates 155 | mapM_ (\(PStat ih up down) -> do 156 | (u, d) <- liftIO $ readIORef r 157 | let nup = u + up 158 | ndn = d + down 159 | liftIO $ nup `deepseq` ndn `deepseq` writeIORef r (nup, ndn) 160 | s <- get 161 | let adjusted = M.adjust (\st -> 162 | st { uploaded = (uploaded st) + up 163 | , downloaded = (downloaded st) + down }) ih s 164 | deepseq adjusted (put adjusted)) updates 165 | 166 | -------------------------------------------------------------------------------- /doc/process-transformers.lhs: -------------------------------------------------------------------------------- 1 | Implementing processes 2 | ====================== 3 | 4 | The Haskell Bittorrent project is evolving at a steady state these days. In the last couple of days, we have implemented most of the code relevant for carrying out choking of peers. Choking is the process by which you only communicate to a few peers at a time. Thus TCP/IP congestion can be avoided which drives up the download and upload rates. This post is not on this part however, which must wait a bit. 5 | 6 | A fellow dane, Thomas Christensen, heeded my call and did some hlint runs over the code. Hopefully, we'll see more work from him ([github](http://github.com/thomaschrstnsn)). Alex Mason has added even more parsing stuff through Cereal to the code, fixed a number of bugs in the parser and improved its general state. His venture is described on his blog ([here](http://random.axman6.com/blog/)). 7 | 8 | Processes 9 | --------- 10 | 11 | I will be talking about processes in this post. When haskell-torrent started, our processes were simply "things spawned in the IO monad". The approach works, but it quickly becomes inadequate for several reasons. In Haskell and FP in general, a lot of power stems from the idea that we can write a large set of small building blocks and then compose them together to form increasingly larger and larger blocks as we go. When composing, we use a fairly small number of helpers -- readily present in the standard libraries. 12 | 13 | When running in IO, we quickly end up with a lot of state. This can of course be passed around by "hand" and tail-calls. Unfortunately, this means we will end up using a lot of our precious coding time doing just exactly that. To optimize, we need a way to reflect away configuration and state when we don't need it, and reify the information at certain points in the program where it is necessary to know the state. 14 | 15 | The ubiquitious tool in Haskell for this are Monads. Not a single monad like IO, but a for-the-case relevant monad built from monad transformers. A couple of days ago, I installed [XMonad](http://xmonad.org) by Spencer Janssen, Don Stewart, Jason Creighton and many more. This is a window manager -- but surprisingly, it needs to solve some problems similar to the one in haskell-torrent. By inspiration from XMonad, we define 16 | 17 | > newtype Process a b c = Process (ReaderT a (StateT b IO) c) 18 | > deriving (Functor, Monad, MonadIO, MonadState b, MonadReader a, Typeable) 19 | 20 | That is, a *Process* is a type. It contains some configuration data *a*, an internal state *b* and is in the process of evaluating to a value of type *c*. We use a Reader transformer so we can *ask* for configuration data when we need it. We do not expect this configuration data to be altered when the process runs. A State Transformer takes care of the internal state of the process. Finally, we let the underlying monad be IO. This gives us access to CML and the outside world. 21 | 22 | We let GHC derive a large set of things automatically (using several extensions in the process). This gives an easier way to manipulate the state when the process is running. 23 | 24 | Running a Process is easy: 25 | 26 | > runP :: a -> b -> Process a b c -> IO (c, b) 27 | > runP c st (Process p) = runStateT (runReaderT p c) st 28 | 29 | which is exactly like in XMonad. Spawning a new process is also fairly easy: 30 | 31 | > spawnP :: a -> b -> Process a b () -> IO ThreadId 32 | > spawnP c st p = spawn proc 33 | > where proc = do runP c st p 34 | > return () 35 | 36 | In general different processes will have different configurations *a*. These will usually contain the channels on which the process can communicate. We then define a type class 37 | 38 | > class Logging a where 39 | > getLogger :: a -> LogChannel 40 | > 41 | > instance Logging LogChannel where 42 | > getLogger = id 43 | 44 | of types that contain a logger channel. This means we can define a generic log function like this: 45 | 46 | > log :: Logging a => String -> Process a b () 47 | > log msg = do 48 | > logC <- asks getLogger 49 | > liftIO $ logMsg logC msg 50 | 51 | Type classes are a magnificent tool when we want to coerce a general function on top of different types. Many of our configurations in the client will instance the *Logging* class and then the log function knows how to access the logger in the Reader. 52 | 53 | What does this buy us 54 | --------------------- 55 | 56 | The advantage of doing this change on the code is twofold: First, the amount of parameter passing is considerably reduced. Reflection into the monad solves this problem. We are now able to compose easier. Function composition is considerably harder when there are many parameters abound. With the change, preliminary restructuring of the Peer process shows a much simpler flow. Also, there are now ample refactoring opportunities available with the change. 57 | 58 | Second, the monad means we can use locality much more to our advantage. A common idiom is to modify the state of the process or to retrieve the current state for query. This now happens locally at the point where it is needed. Before, we might have needed to pass a parameter through several functions and then use it. 59 | 60 | What next? 61 | ---------- 62 | 63 | There are a small number of things that needs to be addressed before we can claim that the client is a full bittorrent client: 64 | 65 | - The client needs to correctly handle the concept of *interest*. It must tell other clients if it is interested in the pieces they have at their disposal for transmission. I have some preliminary code for doing this. 66 | - The client needs to correctly tell the tracker how many bytes it uploaded and downloaded. This measure is needed on many private trackers as they require people to upload data back. 67 | - The client needs to be better at choosing the next eligible piece. Choosing one randomly is good enough. 68 | - The client needs to handle multi-file torrents. It is not as hard as it may sound -- the only part of the system that needs to know about files is the code handling the file system. All other parts can just keep on transferring pieces. 69 | - For choking to work correctly, we must know how fast we are currently transferring to a peer. This is an interesting little problem if somebody feels their curiosity tickled :) 70 | - We currently take space proportional to torrent size due to our SHA1 calculation being slow and not use a file descriptor. Research into a faster SHA1 library would be really beneficial. 71 | - We need to accept incoming connections. The system only connects outward at the moment. 72 | 73 | And of course, it needs some testing in a non-lab setting. Currently it can seed and leech, but my setup is very simple: Opentracker and rtorrent on another computer. 74 | 75 | The main github repository for haskell-torrent is [here](http://github.com/jlouis/haskell-torrent/). 76 | 77 | -------------------------------------------------------------------------------- /src/Data/PieceSet.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts #-} 2 | -- | Module for the representation of PieceSets. Exist so we can abstract on the implementation later 3 | module Data.PieceSet 4 | ( PieceSet 5 | , new 6 | , size 7 | , full 8 | , delete 9 | , Data.PieceSet.null 10 | , insert 11 | , intersection 12 | , intersects 13 | , member 14 | , fromList 15 | , toList 16 | , Data.PieceSet.freeze 17 | -- * Tests 18 | , testSuite 19 | ) 20 | where 21 | 22 | import Control.Applicative 23 | import Control.Monad 24 | import Control.Monad.Trans 25 | import Data.Array.IO 26 | import Data.Array.Unboxed ((!), UArray) 27 | import qualified Data.Foldable as F 28 | import Data.List ((\\), partition, sort, null) 29 | import Prelude hiding (null) 30 | 31 | import Test.Framework 32 | import Test.Framework.Providers.HUnit 33 | import Test.HUnit hiding (Path, Test) 34 | import TestInstance() -- Pull arbitraries 35 | 36 | import Torrent 37 | 38 | newtype PieceSet = PieceSet { unPieceSet :: IOUArray Int Bool } 39 | 40 | new :: MonadIO m => Int -> m PieceSet 41 | new n = {-# SCC "Data.PieceSet/new" #-} 42 | liftIO $ PieceSet <$> newArray (0, n-1) False 43 | 44 | all :: MonadIO m => (Bool -> Bool) -> PieceSet -> m Bool 45 | all f ps = liftIO $ do 46 | elems <- getElems $ unPieceSet ps 47 | return $ Prelude.all f elems 48 | 49 | null :: MonadIO m => PieceSet -> m Bool 50 | null = Data.PieceSet.all (==False) 51 | 52 | full :: MonadIO m => PieceSet -> m Bool 53 | full = {-# SCC "Data.PieceSet/full" #-} Data.PieceSet.all (==True) 54 | 55 | insert :: MonadIO m => Int -> PieceSet -> m () 56 | insert n (PieceSet ps) = {-# SCC "Data.PieceSet/insert" #-} 57 | liftIO $ writeArray ps n True 58 | 59 | size :: MonadIO m => PieceSet -> m Int 60 | size (PieceSet arr) = {-# SCC "Data.PieceSet/size" #-} 61 | liftIO $ do 62 | (l, u) <- getBounds arr 63 | let walk x acc | x > u = return acc 64 | | otherwise = 65 | readArray arr x >>= \p -> 66 | if p then walk (x+1) (acc+1) else walk (x+1) acc 67 | walk l 0 68 | 69 | member :: MonadIO m => Int -> PieceSet -> m Bool 70 | member n (PieceSet arr) = {-# SCC "Data.PieceSet/member" #-} 71 | liftIO $ readArray arr n 72 | 73 | delete :: MonadIO m => Int -> PieceSet -> m () 74 | delete n (PieceSet arr) = {-# SCC "Data.PieceSet/delete" #-} liftIO $ 75 | writeArray arr n False 76 | 77 | intersection :: MonadIO m => PieceSet -> PieceSet -> m [Int] 78 | intersection (PieceSet arr1) (PieceSet arr2) = liftIO $ do 79 | eqSize <- (==) <$> getBounds arr1 <*> getBounds arr2 80 | if not eqSize 81 | then error "Wrong intersection sizes" 82 | else do 83 | elems <- getAssocs arr1 84 | F.foldlM mem [] elems 85 | where 86 | mem ls (_, False) = return ls 87 | mem ls (i, True) = do 88 | m <- readArray arr2 i 89 | return $ if m then (i : ls) else ls 90 | 91 | intersects :: MonadIO m => PieceSet -> PieceSet -> m Bool 92 | intersects (PieceSet arr1) (PieceSet arr2) = liftIO $ do 93 | (l, u) <- getBounds arr1 94 | let walk x | x > u = return False 95 | | otherwise = do 96 | a <- readArray arr1 x 97 | b <- readArray arr2 x 98 | if a && b then return True else walk (x+1) 99 | walk l 100 | 101 | fromList :: MonadIO m => Int -> [Int] -> m PieceSet 102 | fromList n elems = {-# SCC "Data.PieceSet/fromList" #-} liftIO $ do 103 | nArr <- newArray (0, n-1) False 104 | mapM_ (flip (writeArray nArr) True) elems 105 | return $ PieceSet nArr 106 | 107 | toList :: MonadIO m => PieceSet -> m [Int] 108 | toList (PieceSet arr) = {-# SCC "Data.PieceSet/toList" #-} liftIO $ do 109 | elems <- getAssocs arr 110 | return [i | (i, e) <- elems, e == True] 111 | 112 | freeze :: MonadIO m => PieceSet -> m (PieceNum -> Bool) 113 | freeze (PieceSet ps) = do 114 | frozen <- liftIO $ (Data.Array.IO.freeze ps :: IO (UArray Int Bool)) 115 | return $ (frozen !) 116 | 117 | -- Tests 118 | 119 | testSuite :: Test 120 | testSuite = testGroup "Data/PieceSet" 121 | [ testCase "New/Size" testNewSize 122 | , testCase "Full" testFull 123 | , testCase "Build" testBuild 124 | , testCase "Full" testFull 125 | , testCase "Intersection" testIntersect 126 | , testCase "Membership" testMember 127 | , testCase "Insert/Delete" testInsertDelete 128 | ] 129 | 130 | testNewSize :: Assertion 131 | testNewSize = do 132 | a <- new 1337 133 | sz <- size a 134 | assertEqual "For a new PieceSet" sz 0 135 | insert 3 a 136 | insert 5 a 137 | sz2 <- size a 138 | assertEqual "For inserted" sz2 2 139 | 140 | testFull :: Assertion 141 | testFull = do 142 | let maxElem = 1337 143 | ps <- new maxElem 144 | _ <- forM [0..maxElem-1] (flip insert ps) 145 | tst <- liftM and $ mapM (flip member ps) [0..maxElem-1] 146 | assertBool "for a full PieceSet" tst 147 | 148 | testBuild :: Assertion 149 | testBuild = do 150 | let nums = [0..1336] 151 | m = 1336 + 1 152 | ps <- fromList m nums 153 | sz <- size ps 154 | assertEqual "for size" sz (length nums) 155 | 156 | testIntersect :: Assertion 157 | testIntersect = do 158 | let (evens, odds) = partition (\x -> x `mod` 2 == 0) [0..99] 159 | evPS <- fromList 100 evens 160 | oddPS <- fromList 100 odds 161 | is1 <- intersection evPS oddPS 162 | is1' <- intersects evPS oddPS 163 | assertBool "for intersection" (Data.List.null is1) 164 | assertBool "for intersects" (not is1') 165 | ps1 <- fromList 10 [1,2,3,4,9] 166 | ps2 <- fromList 10 [2,5,4,8,9] 167 | is2 <- intersection ps1 ps2 168 | is2' <- intersects ps1 ps2 169 | assertBool "for simple intersection" (sort is2 == [2,4,9]) 170 | assertBool "for simple intersects" is2' 171 | 172 | testMember :: Assertion 173 | testMember = do 174 | let evens = filter (\x -> x `mod` 2 == 0) [0..999] 175 | m = 1000 176 | notThere = [0..999] \\ evens 177 | ps <- fromList m evens 178 | a <- liftM and $ mapM (flip member ps) evens 179 | b <- liftM and $ mapM (liftM not . flip member ps) notThere 180 | assertBool "for members" a 181 | assertBool "for non-members" b 182 | 183 | testInsertDelete :: Assertion 184 | testInsertDelete = do 185 | ps <- new 10 186 | insert 3 ps 187 | insert 4 ps 188 | assertBool "Ins/del #1" =<< member 3 ps 189 | assertBool "Ins/del #2" =<< liftM not (member 5 ps) 190 | delete 3 ps 191 | assertBool "Ins/del #3" =<< member 4 ps 192 | assertBool "Ins/del #4" =<< liftM not (member 3 ps) 193 | insert 5 ps 194 | assertBool "Ins/del #5" =<< member 5 ps 195 | 196 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Combinatorrent - a bittorrent client. 2 | ===================================== 3 | 4 | [![Build Status](https://secure.travis-ci.org/jlouis/combinatorrent.svg?branch=master)](http://travis-ci.org/jlouis/combinatorrent) 5 | 6 | Introduction 7 | ---------- 8 | 9 | This is a bittorrent client. I am the introduction document and I need to be 10 | written by some generous soul! 11 | 12 | Installation 13 | ------------ 14 | 15 | Here is what I do to install haskell torrrent locally on my machine: 16 | 17 | cabal install --prefix=$HOME --user 18 | 19 | Since we are using the magnificient cabal, this is enough to install haskell 20 | torrent in our $HOME/bin directory. 21 | 22 | Usage 23 | ----------------- 24 | 25 | Combinatorrent can currently only do one very simple thing. If you call it with 26 | 27 | Combinatorrent foo.torrent 28 | 29 | then it will begin downloading the file in foo.torrent to the current 30 | directory via the Bittorrent protocol. 31 | 32 | Protocol support 33 | ---------------- 34 | 35 | Currently haskell-torrent supports the following BEPs (See the 36 | [BEP Process](http://www.bittorrent.org/beps/bep_0000.html) document for an 37 | explanation of these) 38 | 39 | - 0003, 0004, 0006, 0010, 0020, 40 | 41 | Combinatorrent implicitly supports these extensions 42 | 43 | - 0027: Support by the virtue of only supporting a single tracker and no 44 | DHT. 45 | 46 | Partially supported extensions: 47 | 48 | - 0007: Combinatorrent understands and uses the "peers6" response from 49 | the tracker to connect clients. On the other hand, it does nothing to 50 | provide the "ipv4=" and "ipv6=" keys on tracker requests. As such, it 51 | can be claimed that 0007 support is available, as everything we left 52 | out is only qualified as MAY. 53 | 54 | - 0023: Combinatorrent supports the "compact" response only, although it 55 | is explicitly stated that the client must support both. In practice it 56 | has little impact as all modern trackers will only return compact 57 | responses anyway. 58 | 59 | Combinatorrent is not supporting these BEPs, but strives to do so one day: 60 | 61 | - 0005, 0009, 0012, 0015, 0016, 0017, 0018, 0019, 0021, 0022, 62 | 0024, 0026, 0028, 0029, 0030, 0031, 0032 63 | 64 | Debugging 65 | --------- 66 | 67 | For debugging, jlouis tends to use the following: 68 | 69 | make conf build test 70 | 71 | This builds Combinatorrent with the *Debug* flag set and also builds the 72 | software with profiling by default so it is easy to hunt down performance 73 | regressions. It also runs the internal test-suite for various values. There 74 | are a couple of interesting targets in the top-level Makefile 75 | 76 | Reading material for hacking Combinatorrent: 77 | -------------------------------------------- 78 | 79 | - [Protocol specification - BEP0003](http://www.bittorrent.org/beps/bep_0003.html): 80 | This is the original protocol specification, tracked into the BEP 81 | process. It is worth reading because it explains the general overview 82 | and the precision with which the original protocol was written down. 83 | 84 | - [Bittorrent Enhancement Process - BEP0000](http://www.bittorrent.org/beps/bep_0000.html) 85 | The BEP process is an official process for adding extensions on top of 86 | the BitTorrent protocol. It allows implementors to mix and match the 87 | extensions making sense for their client and it allows people to 88 | discuss extensions publicly in a forum. It also provisions for the 89 | deprecation of certain features in the long run as they prove to be of 90 | less value. 91 | 92 | - [wiki.theory.org](http://wiki.theory.org/Main_Page) 93 | An alternative description of the protocol. This description is in 94 | general much more detailed than the BEP structure. It is worth a read 95 | because it acts somewhat as a historic remark and a side channel. Note 96 | that there are some commentary on these pages which can be disputed 97 | quite a lot. 98 | 99 | - ["Supervisor Behaviour"](http://erlang.org/doc/design_principles/sup_princ.html) 100 | From the Erlang documentation. How the Erlang Supervisor behaviour 101 | works. The Supervisor and process structure of Combinatorrent is 102 | somewhat inspired by the Erlang ditto. 103 | 104 | Source code Hierarchy 105 | --------------------- 106 | 107 | - **Data**: Data structures. 108 | - **Queue**: Functional queues. Standard variant with two lists. 109 | - **PendingSet**: A wrapper around Data.PSQueue for tracking how 110 | common a piece is. 111 | - **PieceSet**: BitArrays of pieces and their operations. 112 | 113 | - **Process**: Process definitions for the different processes comprising 114 | Combinatorrent 115 | - **ChokeMgr**: Manages choking and unchoking of peers, based upon the current speed of the peer 116 | and its current state. Global for multiple torrents. 117 | - **Console**: Simple console process. Only responds to 'quit' at the moment. 118 | - **DirWatcher**: Watches a directory and adds any torrent present in 119 | it. 120 | - **FS**: Process managing the file system. 121 | - **Listen**: Not used at the moment. Step towards listening sockets. 122 | - **Peer**: Several process definitions for handling peers. Two for sending, one for receiving 123 | and one for controlling the peer and handle the state. 124 | - **PeerMgr**: Management of a set of peers for a single torrent. 125 | - **PieceMgr**: Keeps track of what pieces have been downloaded and what are missing. Also hands 126 | out blocks for downloading to the peers. 127 | - **Status**: Keeps track of uploaded/downloaded/left bytes for a single torrent. Could be globalized. 128 | - **Timer**: Timer events. 129 | - **TorrentManager**: Manages torrents at the top-level. 130 | - **Tracker**: Communication with the tracker. 131 | 132 | - **Protocol**: Modules for interacting with the various bittorrent protocols. 133 | - **BCode**: The bittorrent BCode coding. Used by several protocols. 134 | - **Wire**: The protocol used for communication between peers. 135 | 136 | - **Top Level**: 137 | - **Channels**: Various Channel definitions. 138 | - **Combinatorrent**: Main entry point to the code. Sets up processes. 139 | - **Digest**: SHA1 digests as used in the bittorrent protocol. 140 | - **FS**: Low level Filesystem code. Interacts with files. 141 | - **Process**: Code for Erlang-inspired processes. 142 | - **RateCalc**: Rate calculations for a network socket. We use this to keep track of the 143 | current speed of a peer in one direction. 144 | - **Supervisor**: Erlang-inspired Supervisor processes. 145 | - **Test.hs**: Code for test-framework 146 | - **TestInstance.hs**: Various helper instances not present in the test framework by default 147 | - **Torrent**: Various helpers and types for Torrents. 148 | - **Tracer**: Code for simple "ring"-like tracing. 149 | - **Version.hs.in**: Generates **Version.hs** via the configure script. 150 | 151 | 153 | -------------------------------------------------------------------------------- /src/Process/Peer/SenderQ.hs: -------------------------------------------------------------------------------- 1 | module Process.Peer.SenderQ 2 | ( SenderQMsg(..) 3 | , start 4 | ) 5 | where 6 | 7 | import Control.Concurrent 8 | import Control.Concurrent.STM 9 | 10 | import Control.Monad.Reader 11 | import Control.Monad.State 12 | 13 | import Prelude hiding (log) 14 | 15 | import qualified Data.ByteString as B 16 | import qualified Data.ByteString.Lazy as L 17 | import Data.List (foldl') 18 | 19 | import Channels 20 | import Process 21 | import Process.FS hiding (start) 22 | import qualified Data.Queue as Q 23 | import Supervisor 24 | import Torrent 25 | import Protocol.Wire 26 | 27 | -- | Messages we can send to the Send Queue 28 | data SenderQMsg = SenderQCancel PieceNum Block -- ^ Peer requested that we cancel a piece 29 | | SenderQM Message -- ^ We want to send the Message to the peer 30 | | SenderQPiece PieceNum Block -- ^ Request for a piece transmit 31 | | SenderOChoke -- ^ We want to choke the peer 32 | | SenderQRequestPrune PieceNum Block -- ^ Prune SendQueue of this (pn, blk) pair 33 | 34 | data CF = CF { sqIn :: TChan SenderQMsg 35 | , sqOut :: TMVar L.ByteString 36 | , peerCtlCh :: TChan MsgTy 37 | , readBlockTV :: TMVar B.ByteString 38 | , fsCh :: FSPChannel 39 | , fastExtension :: Bool 40 | } 41 | 42 | data ST = ST { outQueue :: !(Q.Queue (Either Message (PieceNum, Block))) 43 | , bytesTransferred :: !Int 44 | } 45 | 46 | instance Logging CF where 47 | logName _ = "Process.Peer.SendQueue" 48 | 49 | -- | sendQueue Process, simple version. 50 | -- TODO: Split into fast and slow. 51 | start :: [Capabilities] -> TChan SenderQMsg -> TMVar L.ByteString -> TChan MsgTy 52 | -> FSPChannel -> SupervisorChannel -> IO ThreadId 53 | start caps inC outC bandwC fspC supC = do 54 | rbtv <- liftIO newEmptyTMVarIO 55 | spawnP (CF inC outC bandwC rbtv fspC 56 | (Fast `elem` caps)) (ST Q.empty 0) 57 | ({-# SCC "SenderQ" #-} catchP pgm 58 | (defaultStopHandler supC)) 59 | 60 | pgm :: Process CF ST () 61 | pgm = {-# SCC "Peer.SendQueue" #-} do 62 | q <- gets outQueue 63 | l <- gets bytesTransferred 64 | -- Gather together events which may trigger 65 | when (l > 0) rateUpdateEvent 66 | ic <- asks sqIn 67 | ov <- asks sqOut 68 | r <- case Q.first q of 69 | Nothing -> liftIO $ atomically (readTChan ic >>= return . Right) 70 | Just r -> do p <- case r of 71 | Left m -> return m 72 | Right (pn, blk) -> do d <- readBlock pn blk 73 | return $ Piece pn (blockOffset blk) d 74 | let bs = encodePacket p 75 | sz = fromIntegral $ L.length bs 76 | liftIO . atomically $ 77 | (putTMVar ov bs >> return (Left sz)) `orElse` 78 | (readTChan ic >>= return . Right) 79 | case r of 80 | Left sz -> 81 | modify (\s -> s { bytesTransferred = bytesTransferred s + sz 82 | , outQueue = Q.remove (outQueue s)}) 83 | Right m -> 84 | case m of 85 | SenderQM msg -> modifyQ (Q.push $ Left msg) 86 | SenderQPiece n blk -> modifyQ (Q.push $ Right (n, blk)) 87 | SenderQCancel n blk -> do 88 | fe <- asks fastExtension 89 | if fe 90 | then do 91 | piece <- partitionQ (pickPiece n blk) 92 | case piece of 93 | [] -> return () -- Piece must have been sent 94 | [_] -> modifyQ (Q.push (Left $ RejectRequest n blk)) 95 | ps -> fail $ "Impossible case, SenderQCancel " ++ show (length ps) 96 | else modifyQ (Q.filter (filterPiece n blk)) 97 | SenderOChoke -> do 98 | fe <- asks fastExtension 99 | if fe 100 | then do 101 | -- In the fast extension, we explicitly reject all pieces 102 | pieces <- partitionQ filterAllPiece 103 | modifyQ (Q.push $ Left Choke) 104 | let rejects = map (\(Right (pn, blk)) -> Left $ RejectRequest pn blk) 105 | pieces 106 | modifyQ (flip (foldl' (flip Q.push)) rejects) 107 | else do modifyQ (Q.filter filterAllPiece) 108 | modifyQ (Q.push $ Left Choke) 109 | SenderQRequestPrune n blk -> do 110 | fe <- asks fastExtension 111 | piece <- partitionQ (pickRequest n blk) 112 | case piece of 113 | [] -> modifyQ (Q.push (Left $ Cancel n blk)) -- Request must have been sent 114 | [_] -> if fe 115 | then modifyQ -- This is a hack for now 116 | (Q.push (Left $ Cancel n blk) . 117 | Q.push (Left $ Request n blk)) 118 | else return () 119 | ps -> fail $ "Impossible case, SenderQRequestPrune " 120 | ++ show ps ++ ", " ++ show fe 121 | pgm 122 | 123 | rateUpdateEvent :: Process CF ST () 124 | rateUpdateEvent = {-# SCC "Peer.SendQ.rateUpd" #-} do 125 | l <- gets bytesTransferred 126 | bwc <- asks peerCtlCh 127 | liftIO . atomically $ writeTChan bwc (FromSenderQ l) 128 | modify (\s -> s { bytesTransferred = 0 }) 129 | 130 | -- The type of the Outgoing queue 131 | type OutQT = Either Message (PieceNum, Block) 132 | 133 | filterAllPiece :: OutQT -> Bool 134 | filterAllPiece (Right _) = True 135 | filterAllPiece (Left _) = False 136 | 137 | pickPiece :: PieceNum -> Block -> OutQT -> Bool 138 | pickPiece n blk (Right (n1, blk1)) | n == n1 && blk == blk1 = True 139 | pickPiece _ _ _ = False 140 | 141 | filterPiece :: PieceNum -> Block -> OutQT -> Bool 142 | filterPiece n blk (Right (n1, blk1)) | n == n1 && blk == blk1 = False 143 | | otherwise = True 144 | filterPiece _ _ _ = True 145 | 146 | pickRequest :: PieceNum -> Block -> OutQT -> Bool 147 | pickRequest n blk (Left (Request n1 blk1)) | n == n1 && blk == blk1 = True 148 | pickRequest _ _ _ = False 149 | 150 | modifyQ :: (Q.Queue (OutQT) -> 151 | Q.Queue (OutQT)) 152 | -> Process CF ST () 153 | modifyQ f = modify (\s -> s { outQueue = f $! outQueue s }) 154 | 155 | partitionQ :: (OutQT -> Bool) -> Process CF ST [OutQT] 156 | partitionQ p = do 157 | s <- get 158 | let (as, nq) = Q.partition p $ outQueue s 159 | put $! s { outQueue = nq } 160 | return as 161 | 162 | -- | Read a block from the filesystem for sending 163 | readBlock :: PieceNum -> Block -> Process CF ST B.ByteString 164 | readBlock pn blk = do 165 | v <- asks readBlockTV 166 | fch <- asks fsCh 167 | liftIO $ do 168 | atomically $ writeTChan fch (ReadBlock pn blk v) 169 | atomically $ takeTMVar v 170 | -------------------------------------------------------------------------------- /src/Process/PeerMgr.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP, TupleSections #-} 2 | module Process.PeerMgr ( 3 | -- * Types 4 | Peer(..) 5 | , PeerMgrMsg(..) 6 | , PeerMgrChannel 7 | , TorrentLocal(..) 8 | -- * Interface 9 | , Process.PeerMgr.start 10 | ) 11 | where 12 | 13 | #if __GLASGOW_HASKELL__ <= 708 14 | import AdaptGhcVersion 15 | #endif 16 | 17 | import Control.Concurrent 18 | import Control.Concurrent.STM 19 | import Control.DeepSeq 20 | 21 | import Control.Monad.State 22 | import Control.Monad.Reader 23 | 24 | import Data.Array 25 | import qualified Data.Map as M 26 | 27 | import qualified Network.Socket as Sock 28 | import System.Log.Logger 29 | 30 | import Channels 31 | import Process 32 | import Process.Peer as Peer 33 | import Process.ChokeMgr hiding (start) 34 | import Process.FS hiding (start) 35 | import Process.PieceMgr hiding (start) 36 | import Process.Status hiding (start) 37 | import Protocol.Wire 38 | 39 | import Supervisor 40 | import Torrent hiding (infoHash) 41 | 42 | data PeerMgrMsg = PeersFromTracker InfoHash [Peer] 43 | | NewIncoming (Sock.Socket, Sock.SockAddr) 44 | | NewTorrent InfoHash TorrentLocal 45 | | StopTorrent InfoHash 46 | 47 | data TorrentLocal = TorrentLocal 48 | { tcPcMgrCh :: !PieceMgrChannel 49 | , tcFSCh :: !FSPChannel 50 | , tcStatTV :: !(TVar [PStat]) 51 | , tcPM :: !PieceMap 52 | } 53 | 54 | #if __GLASGOW_HASKELL__ < 710 55 | instance NFData ThreadId where 56 | rnf x = x `seq` () 57 | #endif 58 | 59 | type PeerMgrChannel = TChan PeerMgrMsg 60 | 61 | data CF = CF { peerCh :: PeerMgrChannel 62 | , mgrCh :: MgrChannel 63 | , peerPool :: SupervisorChannel 64 | , chokeMgrCh :: ChokeMgrChannel 65 | , chokeRTV :: RateTVar 66 | } 67 | 68 | instance Logging CF where 69 | logName _ = "Process.PeerMgr" 70 | 71 | 72 | type ChanManageMap = M.Map InfoHash TorrentLocal 73 | 74 | data ST = ST { peersInQueue :: ![(InfoHash, Peer)] 75 | , peers :: !(M.Map ThreadId PeerChannel) 76 | , peerId :: !PeerId 77 | , cmMap :: !ChanManageMap 78 | } 79 | 80 | start :: PeerMgrChannel -> PeerId 81 | -> ChokeMgrChannel -> RateTVar -> SupervisorChannel 82 | -> IO ThreadId 83 | start ch pid chokeMgrC rtv supC = 84 | do mgrC <- newTChanIO 85 | fakeChan <- newTChanIO 86 | pool <- liftM snd $ oneForOne "PeerPool" [] fakeChan 87 | spawnP (CF ch mgrC pool chokeMgrC rtv) 88 | (ST [] M.empty pid cmap) ({-# SCC "PeerMgr" #-} catchP lp 89 | (defaultStopHandler supC)) 90 | where 91 | cmap = M.empty 92 | lp = do 93 | pc <- asks peerCh 94 | mc <- asks mgrCh 95 | q <- liftIO . atomically $ 96 | (readTChan pc >>= return . Left) `orElse` 97 | (readTChan mc >>= return . Right) 98 | case q of 99 | Left msg -> incomingPeers msg 100 | Right msg -> peerEvent msg 101 | fillPeers 102 | lp 103 | 104 | incomingPeers :: PeerMgrMsg -> Process CF ST () 105 | incomingPeers msg = 106 | case msg of 107 | PeersFromTracker ih ps -> do 108 | debugP "Adding peers to queue" 109 | modify (\s -> s { peersInQueue = (map (ih,) ps) ++ peersInQueue s }) 110 | NewIncoming conn@(s, _) -> do 111 | sz <- liftM M.size $ gets peers 112 | if sz < numPeers 113 | then do debugP "New incoming peer, handling" 114 | _ <- addIncoming conn 115 | return () 116 | else do debugP "Already too many peers, closing!" 117 | liftIO $ Sock.close s 118 | NewTorrent ih tl -> do 119 | modify (\s -> s { cmMap = M.insert ih tl (cmMap s)}) 120 | StopTorrent _ih -> do 121 | errorP "Not implemented stopping yet" 122 | 123 | peerEvent :: MgrMessage -> Process CF ST () 124 | peerEvent msg = case msg of 125 | Connect ih tid c -> newPeer ih tid c 126 | Disconnect tid -> removePeer tid 127 | where 128 | newPeer ih tid c = do debugP $ "Adding new peer " ++ show tid 129 | cch <- asks chokeMgrCh 130 | liftIO . atomically $ writeTChan cch (AddPeer ih tid c) 131 | npeers <- M.insert tid c <$> gets peers 132 | npeers `deepseq` modify (\s -> s { peers = npeers }) 133 | removePeer tid = do debugP $ "Removing peer " ++ show tid 134 | cch <- asks chokeMgrCh 135 | liftIO . atomically $ writeTChan cch (RemovePeer tid) 136 | npeers <- M.delete tid <$> gets peers 137 | npeers `deepseq` modify (\s -> s { peers = npeers }) 138 | 139 | numPeers :: Int 140 | numPeers = 40 141 | 142 | fillPeers :: Process CF ST () 143 | fillPeers = do 144 | sz <- liftM M.size $ gets peers 145 | when (sz < numPeers) 146 | (do q <- gets peersInQueue 147 | let (toAdd, rest) = splitAt (numPeers - sz) q 148 | debugP $ "Filling with up to " ++ show (numPeers - sz) ++ " peers" 149 | mapM_ addPeer toAdd 150 | modify (\s -> s { peersInQueue = rest })) 151 | 152 | addPeer :: (InfoHash, Peer) -> Process CF ST ThreadId 153 | addPeer (ih, (Peer addr)) = do 154 | ppid <- gets peerId 155 | pool <- asks peerPool 156 | mgrC <- asks mgrCh 157 | cm <- gets cmMap 158 | v <- asks chokeRTV 159 | liftIO $ connect (addr, ppid, ih) pool mgrC v cm 160 | 161 | addIncoming :: (Sock.Socket, Sock.SockAddr) -> Process CF ST ThreadId 162 | addIncoming conn = do 163 | ppid <- gets peerId 164 | pool <- asks peerPool 165 | mgrC <- asks mgrCh 166 | v <- asks chokeRTV 167 | cm <- gets cmMap 168 | liftIO $ acceptor conn pool ppid mgrC v cm 169 | 170 | type ConnectRecord = (Sock.SockAddr, PeerId, InfoHash) 171 | 172 | connect :: ConnectRecord -> SupervisorChannel -> MgrChannel -> RateTVar -> ChanManageMap 173 | -> IO ThreadId 174 | connect (addr, pid, ih) pool mgrC rtv cmap = 175 | forkIO (connector >> return ()) 176 | where 177 | connector = {-# SCC "connect" #-} 178 | do sock <- Sock.socket Sock.AF_INET Sock.Stream Sock.defaultProtocol 179 | debugM "Process.PeerMgr.connect" $ "Connecting to: " ++ show addr 180 | Sock.connect sock addr 181 | debugM "Process.PeerMgr.connect" "Connected, initiating handShake" 182 | r <- initiateHandshake sock pid ih 183 | debugM "Process.PeerMgr.connect" "Handshake run" 184 | case r of 185 | Left err -> do debugM "Process.PeerMgr.connect" 186 | ("Peer handshake failure at host " ++ show addr 187 | ++ " with error " ++ err) 188 | return () 189 | Right (caps, _rpid, ihsh) -> 190 | do debugM "Process.PeerMgr.connect" "entering peerP loop code" 191 | let tc = case M.lookup ihsh cmap of 192 | Nothing -> error "Impossible (2), I hope" 193 | Just x -> x 194 | children <- Peer.start sock caps mgrC rtv 195 | (tcPcMgrCh tc) (tcFSCh tc) (tcStatTV tc) 196 | (tcPM tc) (succ . snd . bounds $ tcPM tc) 197 | ihsh 198 | atomically $ writeTChan pool $ 199 | SpawnNew (Supervisor $ allForOne "PeerSup" children) 200 | return () 201 | 202 | acceptor :: (Sock.Socket, Sock.SockAddr) -> SupervisorChannel 203 | -> PeerId -> MgrChannel -> RateTVar -> ChanManageMap 204 | -> IO ThreadId 205 | acceptor (s,sa) pool pid mgrC rtv cmmap = 206 | forkIO (connector >> return ()) 207 | where ihTst k = M.member k cmmap 208 | connector = {-# SCC "acceptor" #-} do 209 | debugLog "Handling incoming connection" 210 | r <- receiveHandshake s pid ihTst 211 | debugLog "RecvHandshake run" 212 | case r of 213 | Left err -> do debugLog ("Incoming Peer handshake failure with " 214 | ++ show sa ++ ", error: " ++ err) 215 | return() 216 | Right (caps, _rpid, ih) -> 217 | do debugLog "entering peerP loop code" 218 | let tc = case M.lookup ih cmmap of 219 | Nothing -> error "Impossible, I hope" 220 | Just x -> x 221 | children <- Peer.start s caps mgrC rtv (tcPcMgrCh tc) (tcFSCh tc) 222 | (tcStatTV tc) (tcPM tc) 223 | (succ . snd . bounds $ tcPM tc) ih 224 | atomically $ writeTChan pool $ 225 | SpawnNew (Supervisor $ allForOne "PeerSup" children) 226 | return () 227 | debugLog = debugM "Process.PeerMgr.acceptor" 228 | 229 | -------------------------------------------------------------------------------- /src/FS.hs: -------------------------------------------------------------------------------- 1 | -- | Filesystem routines. These are used for working with and 2 | -- manipulating files in the filesystem. 3 | module FS (PieceInfo(..), 4 | PieceMap, 5 | Handles, 6 | readPiece, 7 | readBlock, 8 | writeBlock, 9 | mkPieceMap, 10 | checkFile, 11 | checkPiece, 12 | openAndCheckFile, 13 | canSeed) 14 | where 15 | 16 | import Control.Monad.State 17 | 18 | import Data.Array 19 | import qualified Data.ByteString.Char8 as B 20 | import qualified Data.ByteString.Lazy as L 21 | import qualified Data.Map as M 22 | 23 | import System.IO 24 | import System.Directory (createDirectoryIfMissing) 25 | import System.FilePath (joinPath) 26 | 27 | import Protocol.BCode as BCode 28 | import qualified Digest as D 29 | import Torrent 30 | 31 | -- | For multi-file torrents we've got to maintain multiple file 32 | -- handles. The data structure may as well be a Map Range Handle, 33 | -- but that's detailto only @projectHandles@. More importantly, 34 | -- functions operating on the files must be aware that a 35 | -- piece/block can span multiple files. 36 | -- 37 | -- FIXME: Replace this with a handle cache later. Many peers & many 38 | -- tiny files will make us overstep the fd limit (usually 39 | -- 1024). 40 | newtype Handles = Handles [(Handle, Integer)] -- ^[(fileHandle, fileLength)] 41 | 42 | projectHandles :: Handles 43 | -> Integer -- ^Torrent offset 44 | -> Integer -- ^Torrent size 45 | -> [(Handle 46 | ,Integer 47 | ,Integer 48 | )] -- ^ (File handle, file chunk offset, file chunk size) 49 | {- 50 | projectHandles handles offset size = let r = projectHandles' handles offset size 51 | in trace ("projectHandles " ++ 52 | show handles ++ " " ++ 53 | show offset ++ " " ++ 54 | show size ++ " = " ++ 55 | show r 56 | ) $ 57 | r 58 | -} 59 | projectHandles (Handles handles@((h1, length1):handles')) offs size 60 | | size <= 0 = 61 | fail "FS: Should have already stopped projection" 62 | | null handles = 63 | fail "FS: Attempt to read beyond torrent length" 64 | | offs >= length1 = 65 | projectHandles (Handles handles') (offs - length1) size 66 | | otherwise = 67 | let size1 = length1 - offs -- How much of h1 to take? 68 | in if size1 >= size 69 | then [(h1, offs, size)] 70 | else (h1, offs, size1) : 71 | projectHandles (Handles handles') 0 (size - size1) 72 | projectHandles (Handles []) _ _ = fail "FS: Empty Handles list, can't happen" 73 | 74 | pInfoLookup :: PieceNum -> PieceMap -> IO PieceInfo 75 | pInfoLookup pn mp = return $ mp ! pn 76 | 77 | -- | FIXME: minor code duplication with @readBlock@ 78 | readPiece :: PieceNum -> Handles -> PieceMap -> IO L.ByteString 79 | readPiece pn handles mp = 80 | {-# SCC "readPiece" #-} 81 | do pInfo <- pInfoLookup pn mp 82 | bs <- L.concat `fmap` 83 | forM (projectHandles handles (offset pInfo) (len pInfo)) 84 | (\(h, offs, size) -> 85 | do hSeek h AbsoluteSeek offs 86 | L.hGet h (fromInteger size) 87 | ) 88 | if L.length bs == (fromInteger . len $ pInfo) 89 | then return bs 90 | else fail "FS: Wrong number of bytes read" 91 | 92 | -- | FIXME: concatenating strict ByteStrings may turn out 93 | -- expensive. Returning lazy ones may be more appropriate. 94 | readBlock :: PieceNum -> Block -> Handles -> PieceMap -> IO B.ByteString 95 | readBlock pn blk handles mp = 96 | {-# SCC "readBlock" #-} 97 | do pInfo <- pInfoLookup pn mp 98 | B.concat `fmap` 99 | forM (projectHandles handles (offset pInfo + (fromIntegral $ blockOffset blk)) 100 | (fromIntegral $ blockSize blk)) 101 | (\(h, offs, size) -> 102 | do hSeek h AbsoluteSeek offs 103 | B.hGet h $ fromInteger size 104 | ) 105 | 106 | -- | The call @writeBlock h n blk pm blkData@ will write the contents of @blkData@ 107 | -- to the file pointed to by handle at the correct position in the file. If the 108 | -- block is of a wrong length, the call will fail. 109 | writeBlock :: Handles -> PieceNum -> Block -> PieceMap -> B.ByteString -> IO () 110 | writeBlock handles n blk pm blkData = 111 | {-# SCC "writeBlock" #-} 112 | do when lenFail $ fail "Writing block of wrong length" 113 | pInfo <- pInfoLookup n pm 114 | foldM_ (\content (h, offs, size) -> 115 | do let size' = fromInteger size 116 | (toStore, rest) = B.splitAt size' content 117 | hSeek h AbsoluteSeek offs 118 | B.hPut h $ toStore 119 | hFlush h 120 | return rest 121 | ) blkData (projection (position pInfo) (fromIntegral $ B.length blkData)) 122 | return () 123 | where 124 | projection = {-# SCC "projectHandles" #-} projectHandles handles 125 | position :: PieceInfo -> Integer 126 | position pinfo = (offset pinfo) + fromIntegral (blockOffset blk) 127 | lenFail = B.length blkData /= blockSize blk 128 | 129 | -- | The @checkPiece h inf@ checks the file system for correctness of a given piece, namely if 130 | -- the piece described by @inf@ is correct inside the file pointed to by @h@. 131 | checkPiece :: PieceInfo -> Handles -> IO Bool 132 | checkPiece inf handles = {-# SCC "checkPiece" #-} do 133 | bs <- L.concat `fmap` 134 | forM (projectHandles handles (offset inf) (fromInteger $ len inf)) 135 | (\(h, offs, size) -> 136 | do hSeek h AbsoluteSeek offs 137 | L.hGet h (fromInteger size) 138 | ) 139 | return (D.digest bs == digest inf) 140 | 141 | -- | Create a MissingMap from a file handle and a piecemap. The system will read each part of 142 | -- the file and then check it against the digest. It will create a map of what we are missing 143 | -- in the file as a missing map. We could alternatively choose a list of pieces missing rather 144 | -- then creating the data structure here. This is perhaps better in the long run. 145 | checkFile :: Handles -> PieceMap -> IO PiecesDoneMap 146 | checkFile handles pm = do l <- mapM checkP pieces 147 | return $ M.fromList l 148 | where pieces = assocs pm 149 | checkP :: (PieceNum, PieceInfo) -> IO (PieceNum, Bool) 150 | checkP (pn, pInfo) = do b <- checkPiece pInfo handles 151 | return (pn, b) 152 | 153 | -- | Extract the PieceMap from a bcoded structure 154 | -- Needs some more defense in the long run. 155 | mkPieceMap :: BCode -> Maybe PieceMap 156 | mkPieceMap bc = fetchData 157 | where fetchData = do pLen <- infoPieceLength bc 158 | pieceData <- infoPieces bc 159 | tLen <- infoLength bc 160 | let pis = extract pLen tLen 0 pieceData 161 | l = length pis 162 | pm = array (0, l-1) (zip [0..] pis) 163 | when ( tLen /= (sum $ map len $ elems pm) ) 164 | (error "PieceMap construction size assertion failed") 165 | return pm 166 | extract :: Integer -> Integer -> Integer -> [B.ByteString] -> [PieceInfo] 167 | extract _ 0 _ [] = [] 168 | extract plen tlen offst (p : ps) | tlen < plen = PieceInfo { 169 | offset = offst, 170 | len = tlen, 171 | digest = p } : extract plen 0 (offst + plen) ps 172 | | otherwise = inf : extract plen (tlen - plen) (offst + plen) ps 173 | where inf = PieceInfo { offset = offst, 174 | len = plen, 175 | digest = p } 176 | extract _ _ _ _ = error "mkPieceMap: the impossible happened!" 177 | 178 | -- | Predicate function. True if nothing is missing from the map. 179 | canSeed :: PiecesDoneMap -> Bool 180 | canSeed = M.fold (&&) True 181 | 182 | -- | Process a BCoded torrent file. Create directories, open the files 183 | -- in question, check it and return Handles plus a haveMap for the 184 | -- file 185 | openAndCheckFile :: BCode -> IO (Handles, PiecesDoneMap, PieceMap) 186 | openAndCheckFile bc = 187 | do 188 | handles <- Handles `fmap` 189 | forM files 190 | (\(path, l) -> 191 | do let dir = joinPath $ init path 192 | when (dir /= "") $ 193 | createDirectoryIfMissing True dir 194 | let fpath = joinPath path 195 | h <- openBinaryFile fpath ReadWriteMode 196 | return (h, l) 197 | ) 198 | have <- checkFile handles pieceMap 199 | return (handles, have, pieceMap) 200 | where Just files = BCode.infoFiles bc 201 | Just pieceMap = mkPieceMap bc 202 | -------------------------------------------------------------------------------- /doc/piecemanager.lhs: -------------------------------------------------------------------------------- 1 | On the idioms used in haskell-torrent 2 | ===================================== 3 | 4 | This post serves several purposes. First, we bring an update on the 5 | code state: The client is now able to leech, that is download, in a 6 | lab setting. The lab consist of a test tracker, an rtorrent client two 7 | computers. The tesfile is file of 5000000 bytes containing a specific 8 | phrase over and over. This gives good compression ratio in git and 9 | thus it is not that irritating to move over the wire if needed. 10 | 11 | Alex Mason did a good job improving several things in the project. He 12 | added prioritization to logging, got compilation working on GHC 6.12.x 13 | and is now looking at improving every aspect of data parsing. The 14 | BCode work, able to handle the bittorrent bencoded data using 15 | applicative functors (**Data.Applicative**) look really good. 16 | 17 | The latter part serves two purposes in parallel: It describes the used 18 | idioms and it describes the piece manager code used for leeching 19 | torrents. This is a work in progress and there are some things we 20 | don't handle yet. 21 | 22 | Also, I am trying to use John MacFarlane’s excellent 23 | [pandoc](http://johnmacfarlane.net/pandoc/) package for typesetting 24 | this in blogger. 25 | 26 | The Piecemanager 27 | ---------------- 28 | 29 | The Piece Manager is defined in the module **PieceMgrP**. It is 30 | responsible for keeping track of the torrents internal Piece 31 | State. That is, what do we need to download, and what have we already 32 | downloaded and can serve to other peers. Remember that in our 33 | terminology, a Block is a subset of a piece, given by an offset and a 34 | length. You could call it a *slice* of a piece. 35 | 36 | The basic idiom of the piecemanager is that of an owning process. In 37 | traditional semaphore-oriented programming we protect a data structure 38 | by a mutex. We could also protect it software transactions, but since 39 | we partially derived haskell-torrent from etorrent, we'll go with the 40 | message passing method. We simple invent a process to manage the data 41 | structure. Operations on the structure is then serialized by passing 42 | them to the process and gettings answers back, RPC-style. It might not 43 | be parallel, but it certainly is easy. 44 | 45 | The data structure we want to protect is the piece database: 46 | 47 | > data PieceDB = PieceDB 48 | > { pendingPieces :: [PieceNum] 49 | > , donePiece :: [PieceNum] 50 | > , inProgress :: M.Map PieceNum InProgressPiece 51 | > , infoMap :: PieceMap 52 | > } 53 | 54 | The database contains a list of pieces which are pending for 55 | download. This list should, in a perfect world, be maintained with a 56 | histogram such that we know the rarity of each piece. A *good* client 57 | prefers rare pieces to young pieces and selects randomly among pieces 58 | of the same rarity. A weaker client picks randomly without taking 59 | their rarity into consideration. A dumb client, like haskell-torrent 60 | currently, just picks them off from a single end. This is bad for the 61 | torrent cloud to do, but it is a start. If someone comes up with a 62 | data structure which is (practically) efficient for storing 63 | histograms, I would be happy to hear about it. 64 | 65 | The *donePiece* record field is the list of pieces that are done. We 66 | keep this around because when a new peer is connected to the client 67 | then we need to tell this peer about what pieces we have fully 68 | downloaded. 69 | 70 | Then we have **Data.Map** **Map** which tells us something about 71 | Pieces that are in progress. The InProgress data type has the 72 | following structure: 73 | 74 | > data InProgressPiece = InProgressPiece 75 | > { ipDone :: Int 76 | > , ipSize :: Int 77 | > , ipHaveBlocks :: S.Set Block 78 | > , ipPendingBlocks :: [Block] 79 | > } deriving Show 80 | 81 | These fields are (in order) how many blocks in the piece we have 82 | downloaded, the complete size of the piece, a set of the blocks we 83 | have downloaded and a list of blocks pending download. The size of the 84 | piece is almost always the same, but the last piece is different if 85 | the complete file is not a multiple of the block size. 86 | 87 | Returning to the *PieceDB*, the last entry describes the complete 88 | torrent. The PieceMap tells, for each piece, its offset in the file, 89 | its length and its SHA1 digest. Note we do not support multi-file 90 | torrents yet, although this code would probably be unaltered. The 91 | offset is in the multi-file-case the offset in the concatenation of 92 | the files in the torrent. 93 | 94 | ## Starting the process 95 | 96 | The PieceManager process is started with the *start* call: 97 | 98 | > start :: LogChannel -> PieceMgrChannel -> FSPChannel -> PieceDB -> IO () 99 | > start logC mgrC fspC db = (spawn $ lp db) >> return () 100 | > where lp db = do 101 | > msg <- sync $ receive mgrC (const True) 102 | > case msg of 103 | 104 | We supply a number of **CML**-channels to the process from the outside 105 | and then we spawn off the main loop before returning (). This is 106 | probably not good in the long run, where we will need to handle errors 107 | in the process. But for now we accept that the code is supposedly 108 | error-free and never have any problems. 109 | 110 | The loop code itself synchronizes on messages here named *msg*. These 111 | messages have the form 112 | 113 | > data PieceMgrMsg = GrabBlocks Int [PieceNum] (Channel [(PieceNum, [Block])]) 114 | > | StoreBlock PieceNum Block B.ByteString 115 | > | PutbackBlocks [(PieceNum, Block)] 116 | > | GetDone (Channel [PieceNum]) 117 | 118 | and each of these are going to be explained in the following. A 119 | general note is that if the message contains a channel, it is usually 120 | a form of RPC-message, where the channel is the return channel on 121 | which to send back an answer. One could probably factor this out into 122 | a generic RPC-construction with a bit of cleverness, but I have not 123 | given it much thought yet. 124 | 125 | In the following, we present some examples of processing these messages. 126 | 127 | ### Grabbing blocks 128 | 129 | Here is the fragment for grabbing blocks. It is one of the paths in 130 | the diagram above. 131 | 132 | > GrabBlocks n eligible c -> 133 | > do logMsg logC $ "Grabbing blocks" 134 | > let (blocks, db') = grabBlocks' n eligible db 135 | > logMsg logC $ "Grabbed..." 136 | > sync $ transmit c blocks 137 | > lp db' 138 | 139 | Basically, this call is a request by a peer process to get exclusive 140 | access to *n* blocks among all the available blocks for a while. This 141 | ensures that only this peer will download the blocks, eliminating 142 | block collisions. The *eligible* value is a list of pieces that the 143 | peer has already downloaded, and we should of course only choose 144 | blocks among those. Our block grabber may honor the request or return 145 | an empty list if no block can be satisfied. 146 | 147 | The guts is hidden in the call to the helper *grabBlocks'*, which we 148 | will describe later. 149 | 150 | ### Storing blocks 151 | 152 | The other path in the diagram is maintained by this code fragment: 153 | 154 | > StoreBlock pn blk d -> 155 | > do FSP.storeBlock fspC pn blk d 156 | > let (done, db') = updateProgress db pn blk 157 | > if done 158 | > then do assertPieceComplete db pn logC 159 | > pieceOk <- FSP.checkPiece fspC pn 160 | > let db'' = 161 | > case pieceOk of 162 | > Nothing -> 163 | > error "PieceMgrP: Piece Nonexisting!" 164 | > Just True -> completePiece db' pn 165 | > Just False -> putBackPiece db' pn 166 | > lp db'' 167 | > else lp db' 168 | 169 | We get a request to store the block *blk* in piece *pn* where *d* is 170 | the data we want to store as a **ByteString**. We invoke the 171 | filesystem to actually store the block. In a real world, we would 172 | check that this is really what we wanted. If the piece is already 173 | complete and checked, we don't want a stray block to errornously 174 | destroy the piece. In general we want more checks like these in the 175 | client. 176 | 177 | Then we update the database with the progress on the piece. If the 178 | piece is done, we invoke a checking of that piece. Either it is 179 | complete and Ok, in which case we mark it as such in the database -- 180 | or it is not Ok, in which case we put it back for downloading. This 181 | happens in the real world at times due to clients with bugs so it is 182 | always important not to trust the other client. If the piece 183 | completes, we should send out HAVE messages to all connected peers. I 184 | plan to make the Peer Manager do that, but the code has not yet been 185 | implemented for that. 186 | 187 | ### The RPC idiom 188 | 189 | When we initially connect to a peer, we will have to transfer the 190 | pieces we have. To do this, we construct a BITFIELD message and to 191 | construct this, we need the set of pieces which are complete. The 192 | *GetDone* message handles this: 193 | 194 | > GetDone c -> do sync $ transmit c (donePiece db) 195 | > lp db 196 | 197 | and the peer which want this calls the function 198 | 199 | > getPieceDone :: PieceMgrChannel -> IO [PieceNum] 200 | > getPieceDone ch = do 201 | > c <- channel 202 | > sync $ transmit ch $ GetDone c 203 | > sync $ receive c (const True) 204 | 205 | I think there is an idiom to be extracted from this code and as it is 206 | used quite a lot it would be very beneficial to have in the long run. 207 | 208 | ## Grabbing blocks, the inner workings 209 | 210 | The diagram above hints that grabbing blocks has a more complicated 211 | control flow. The idea of the code is that the control flow is modeled 212 | by a tail call into the next box. Let us break up the code: 213 | 214 | > grabBlocks' :: Int -> [PieceNum] -> PieceDB 215 | > -> ([(PieceNum, [Block])], PieceDB) 216 | > grabBlocks' k eligible db = tryGrabProgress k eligible db [] 217 | > where 218 | 219 | So we will try to grab *k* blocks from the *eligible* pieces from 220 | *db*. The empty list is the accumulator in which we add the blocks we 221 | found as we go. 222 | 223 | > tryGrabProgress 0 _ db captured = (captured, db) -- Enough, exit 224 | > tryGrabProgress n ps db captured = 225 | > case ps `intersect` (fmap fst $ M.toList (inProgress db)) of 226 | > [] -> tryGrabPending n ps db captured 227 | > (h:_) -> grabFromProgress n ps h db captured 228 | 229 | Grabbing from the pieces already in progress minimizes the number of 230 | "open" pieces. We want to minimize this as a complete piece can be 231 | checked for correctness. Correct pieces can then be shared to others 232 | and since our download speed is dependent on our upload speed, 233 | complete pieces is key. Finding a piece is simply to carry out set 234 | intersection. If you've read to this point, there is a refactoring 235 | opportunity by using *M.keys*. 236 | 237 | A smarter client, as hinted, would order the eligible pieces such that 238 | the rarest pieces were tried first. One could also toy with the idea 239 | of *pruning*: tell the peer what pieces we already have downloaded so 240 | it won't include them in further requests. This would keep the *ps* 241 | parameter small in size. 242 | 243 | There are three exit-points. Either there was a piece, *h*, we can 244 | grab from that one, or there were none among the pieces in progress: 245 | we will then seek the list of pending pieces. Finally, if we are 246 | requested to grab 0 blocks, we already have enough blocks and can 247 | return those we have together with the new database. 248 | 249 | > grabFromProgress n ps p db captured = 250 | > let ipp = fromJust $ M.lookup p (inProgress db) 251 | > (grabbed, rest) = splitAt n (ipPendingBlocks ipp) 252 | > nIpp = ipp { ipPendingBlocks = rest } 253 | > nDb = db { inProgress = M.insert p nIpp (inProgress db) } 254 | > in 255 | > if grabbed == [] 256 | > then tryGrabProgress n (ps \\ [p]) db captured 257 | > else tryGrabProgress (n - length grabbed) ps nDb ((p, grabbed) : captured) 258 | 259 | Here, we have found the piece *p* as being in progress. So we find its 260 | *InProgress* structure, and use **Data.List.splitAt** to cut off as 261 | many blocks as possible. We may find that we can't grab any 262 | blocks. This happens when we or other peers are already downloading 263 | all pieces. We then prune the piece *p* from the set of eligible 264 | pieces and try again. Otherwise, we update the database and the number 265 | of pieces to grab and go back to start. Another hint: We should 266 | probably prune *p* from *ps* here as well :) 267 | 268 | > tryGrabPending n ps db captured = 269 | > case ps `intersect` (pendingPieces db) of 270 | > [] -> (captured, db) -- No (more) pieces to download, return 271 | > (h:_) -> 272 | > let blockList = createBlock h db 273 | > ipp = InProgressPiece 0 bSz S.empty blockList 274 | > bSz = len $ fromJust $ M.lookup n (infoMap db) 275 | > nDb = db { pendingPieces = (pendingPieces db) \\ [h], 276 | > inProgress = M.insert h ipp (inProgress db) } 277 | > in tryGrabProgress n ps nDb captured 278 | 279 | Finally, this part grabs from the pending pieces. Either we can't find 280 | a piece and then we can just exit. In the other case we have found a 281 | piece. We then create the blocks of the piece and insert it into the 282 | pieces in progress. The tail-call to *tryGrabProgress* will then find 283 | it. 284 | 285 | I hope the similarity to the diagram is clear. When building these 286 | tail-call mazes I usually start with the diagram sketch. Then I 287 | hand-write them in Graphviz's dot-notation and create them. Finally I 288 | write the code. -------------------------------------------------------------------------------- /src/Protocol/BCode.hs: -------------------------------------------------------------------------------- 1 | -- | Add a module description here 2 | -- also add descriptions to each function. 3 | module Protocol.BCode 4 | ( 5 | BCode, 6 | Path(..), 7 | encode, 8 | -- encodeBS, 9 | decode, 10 | search, 11 | announce, 12 | comment, 13 | creationDate, 14 | announceList, 15 | info, 16 | hashInfoDict, 17 | infoLength, 18 | infoName, 19 | infoPieceLength, 20 | infoPieces, 21 | numberPieces, 22 | infoFiles, 23 | prettyPrint, 24 | 25 | trackerComplete, 26 | trackerIncomplete, 27 | trackerInterval, 28 | trackerMinInterval, 29 | trackerPeers, 30 | trackerWarning, 31 | trackerError, 32 | toBS, 33 | fromBS, 34 | -- Extended handshake 35 | extendedP, 36 | extendedV, 37 | extendedRReq, 38 | extendedMsg, 39 | --Tests 40 | testSuite) 41 | where 42 | 43 | import Control.Monad 44 | import Control.Applicative hiding (many) 45 | import qualified Data.ByteString.Lazy as L 46 | import qualified Data.ByteString as B 47 | 48 | import qualified Data.Text as T 49 | import qualified Data.Text.Encoding as T 50 | 51 | import Data.Char 52 | 53 | import Data.List 54 | import qualified Data.Map as M 55 | import Text.PrettyPrint.HughesPJ hiding (char) 56 | 57 | import Data.Serialize 58 | import Data.Word 59 | 60 | import Test.QuickCheck 61 | import Test.Framework 62 | import Test.Framework.Providers.QuickCheck2 63 | import Test.Framework.Providers.HUnit 64 | import Test.HUnit hiding (Path, Test) 65 | 66 | import Digest 67 | import TestInstance() -- for instances only 68 | 69 | -- | BCode represents the structure of a bencoded file 70 | data BCode = BInt Integer -- ^ An integer 71 | | BString B.ByteString -- ^ A string of bytes 72 | | BArray [BCode] -- ^ An array 73 | | BDict (M.Map B.ByteString BCode) -- ^ A key, value map 74 | deriving (Show, Eq) 75 | 76 | instance Arbitrary BCode where 77 | arbitrary = sized bc' 78 | where bc' :: Int -> Gen BCode 79 | bc' 0 = oneof [BInt <$> arbitrary, 80 | BString <$> arbitrary] 81 | bc' n = 82 | oneof [BInt <$> arbitrary, 83 | BString <$> arbitrary, 84 | BArray <$> sequence (replicate n $ bc' (n `div` 8)), 85 | do keys <- vectorOf n arbitrary 86 | values <- sequence (replicate n $ bc' (n `div` 8)) 87 | return $ BDict $ M.fromList $ zip keys values] 88 | 89 | data Path = PString B.ByteString 90 | | PInt Int 91 | 92 | toW8 :: Char -> Word8 93 | toW8 = fromIntegral . ord 94 | 95 | fromW8 :: Word8 -> Char 96 | fromW8 = chr . fromIntegral 97 | 98 | toBS :: String -> B.ByteString 99 | toBS = B.pack . map toW8 100 | 101 | fromBS :: B.ByteString -> String 102 | fromBS = map fromW8 . B.unpack 103 | 104 | fromUtf8BS :: B.ByteString -> String 105 | fromUtf8BS = T.unpack . T.decodeUtf8 106 | 107 | 108 | instance Serialize BCode where 109 | put (BInt i) = wrap 'i' 'e' $ putShow i 110 | put (BArray arr) = wrap 'l' 'e' . mapM_ put $ arr 111 | put (BDict mp) = wrap 'd' 'e' dict 112 | where dict = mapM_ encPair . M.toList $ mp 113 | encPair (k, v) = put (BString k) >> put v 114 | put (BString s) = do 115 | putShow (B.length s) 116 | putWord8 (toW8 ':') 117 | putByteString s 118 | 119 | get = getBInt <|> getBArray <|> getBDict <|> getBString 120 | 121 | -- | Get something wrapped in two Chars 122 | getWrapped :: Char -> Char -> Get a -> Get a 123 | getWrapped a b p = char a *> p <* char b 124 | 125 | -- | Parses a BInt 126 | getBInt :: Get BCode 127 | getBInt = BInt . read <$> getWrapped 'i' 'e' intP 128 | where intP = ((:) <$> char '-' <*> getDigits) <|> getDigits 129 | 130 | -- | Parses a BArray 131 | getBArray :: Get BCode 132 | getBArray = BArray <$> getWrapped 'l' 'e' (many get) 133 | 134 | 135 | -- | Parses a BDict 136 | getBDict :: Get BCode 137 | getBDict = BDict . M.fromList <$> getWrapped 'd' 'e' (many getPairs) 138 | where getPairs = do 139 | (BString s) <- getBString 140 | x <- get 141 | return (s,x) 142 | 143 | -- | Parses a BString 144 | getBString :: Get BCode 145 | getBString = do 146 | count <- getDigits 147 | BString <$> ( char ':' *> getStr (read count :: Integer)) 148 | where maxInt = fromIntegral (maxBound :: Int) :: Integer 149 | 150 | getStr n | n >= 0 = B.concat <$> (sequence $ getStr' n) 151 | | otherwise = fail $ "read a negative length string, length: " ++ show n 152 | 153 | getStr' n | n > maxInt = getByteString maxBound : getStr' (n-maxInt) 154 | | otherwise = [getByteString . fromIntegral $ n] 155 | 156 | 157 | -- | Get one or more digit characters 158 | getDigits :: Get String 159 | getDigits = many1 digit 160 | 161 | -- | Returns a character if it is a digit, fails otherwise. uses isDigit. 162 | digit :: Get Char 163 | digit = do 164 | x <- getCharG 165 | if isDigit x 166 | then return x 167 | else fail $ "Expected digit, got: " ++ show x 168 | 169 | 170 | -- * Put helper functions 171 | 172 | -- | Put an element, wrapped by two characters 173 | wrap :: Char -> Char -> Put -> Put 174 | wrap a b m = do 175 | putWord8 (toW8 a) 176 | m 177 | putWord8 (toW8 b) 178 | 179 | -- | Put something as it is shown using @show@ 180 | putShow :: Show a => a -> Put 181 | putShow = mapM_ put . show 182 | 183 | -- * Get Helper functions 184 | 185 | -- | Parse zero or items using a given parser 186 | many :: Get a -> Get [a] 187 | many p = many1 p `mplus` return [] 188 | 189 | -- | Parse one or more items using a given parser 190 | many1 :: Get a -> Get [a] 191 | many1 p = (:) <$> p <*> many p 192 | 193 | -- | Parse a given character 194 | char :: Char -> Get Char 195 | char c = do 196 | x <- getCharG 197 | if x == c 198 | then return c 199 | else fail $ "Expected char: '" ++ c:"' got: '" ++ [x,'\''] 200 | 201 | -- | Get a Char. Only works with single byte characters 202 | getCharG :: Get Char 203 | getCharG = fromW8 <$> getWord8 204 | 205 | -- BCode helper functions 206 | 207 | -- | Return the hash of the info-dict in a torrent file 208 | hashInfoDict :: BCode -> IO Digest 209 | hashInfoDict bc = 210 | do ih <- case info bc of 211 | Nothing -> fail "Could not find infoHash" 212 | Just x -> return x 213 | let encoded = encode ih 214 | return $ digest $ L.fromChunks $ [encoded] 215 | 216 | 217 | toPS :: String -> Path 218 | toPS = PString . toBS 219 | 220 | {- Simple search function over BCoded data structures, general case. In practice, we 221 | will prefer some simpler mnemonics -} 222 | search :: [Path] -> BCode -> Maybe BCode 223 | search [] bc = Just bc 224 | search (PInt i : rest) (BArray bs) | i < 0 || i > length bs = Nothing 225 | | otherwise = search rest (bs!!i) 226 | search (PString s : rest) (BDict mp) = M.lookup s mp >>= search rest 227 | search _ _ = Nothing 228 | 229 | search' :: String -> BCode -> Maybe B.ByteString 230 | search' str b = case search [toPS str] b of 231 | Nothing -> Nothing 232 | Just (BString s) -> Just s 233 | _ -> Nothing 234 | 235 | searchStr :: String -> BCode -> Maybe B.ByteString 236 | searchStr = search' 237 | 238 | searchInt :: String -> BCode -> Maybe Integer 239 | searchInt str b = case search [toPS str] b of 240 | Just (BInt i) -> Just i 241 | _ -> Nothing 242 | 243 | searchInfo :: String -> BCode -> Maybe BCode 244 | searchInfo str = search [toPS "info", toPS str] 245 | 246 | {- Various accessors -} 247 | announce, comment, creationDate :: BCode -> Maybe B.ByteString 248 | announce = search' "announce" 249 | comment = search' "comment" 250 | creationDate = search' "creation date" 251 | 252 | 253 | -- | list of list of strings, according to official spec 254 | announceList :: BCode -> Maybe [[B.ByteString]] 255 | announceList b = case search [toPS "announce-list"] b of 256 | Just (BArray xs) -> Just ( map (\(BArray s) -> map' s) xs) 257 | _ -> Nothing 258 | where map' = map (\(BString s) -> s) 259 | 260 | 261 | {- Tracker accessors -} 262 | trackerComplete, trackerIncomplete, trackerInterval :: BCode -> Maybe Integer 263 | trackerMinInterval :: BCode -> Maybe Integer 264 | trackerComplete = searchInt "complete" 265 | trackerIncomplete = searchInt "incomplete" 266 | trackerInterval = searchInt "interval" 267 | trackerMinInterval = searchInt "min interval" 268 | 269 | trackerError, trackerWarning :: BCode -> Maybe B.ByteString 270 | trackerError = searchStr "failure reason" 271 | trackerWarning = searchStr "warning mesage" 272 | 273 | trackerPeers :: BCode -> Maybe (B.ByteString, B.ByteString) 274 | trackerPeers bc = do v4 <- searchStr "peers" bc 275 | v6 <- return $ maybe (B.empty) id $ searchStr "peers6" bc 276 | return (v4, v6) 277 | 278 | info :: BCode -> Maybe BCode 279 | info = search [toPS "info"] 280 | 281 | infoName :: BCode -> Maybe B.ByteString 282 | infoName bc = case search [toPS "info", toPS "name"] bc of 283 | Just (BString s) -> Just s 284 | _ -> Nothing 285 | 286 | infoPieceLength ::BCode -> Maybe Integer 287 | infoPieceLength bc = do BInt i <- search [toPS "info", toPS "piece length"] bc 288 | return i 289 | 290 | infoLength :: BCode -> Maybe Integer 291 | infoLength bc = maybe length2 Just length1 292 | where 293 | -- |info/length key for single-file torrent 294 | length1 = do BInt i <- search [toPS "info", toPS "length"] bc 295 | return i 296 | -- |length summed from files of multi-file torrent 297 | length2 = sum `fmap` 298 | map snd `fmap` 299 | infoFiles bc 300 | 301 | infoPieces :: BCode -> Maybe [B.ByteString] 302 | infoPieces b = do t <- searchInfo "pieces" b 303 | case t of 304 | BString str -> return $ sha1Split str 305 | _ -> mzero 306 | where sha1Split r | r == B.empty = [] 307 | | otherwise = block : sha1Split rest 308 | where (block, rest) = B.splitAt 20 r 309 | 310 | numberPieces :: BCode -> Maybe Int 311 | numberPieces = fmap length . infoPieces 312 | 313 | infoFiles :: BCode -> Maybe [([String], Integer)] -- ^[(filePath, fileLength)] 314 | infoFiles bc = let mbFpath = fromUtf8BS `fmap` infoName bc 315 | mbLength = infoLength bc 316 | mbFiles = do BArray fileList <- searchInfo "files" bc 317 | return $ do fileDict@(BDict _) <- fileList 318 | let Just (BInt l) = search [toPS "length"] fileDict 319 | Just (BArray pth) = search [toPS "path"] fileDict 320 | pth' = map (\(BString s) -> fromUtf8BS s) pth 321 | return (pth', l) 322 | in case (mbFpath, mbLength, mbFiles) of 323 | (Just fpath, _, Just files) -> 324 | Just $ 325 | map (\(pth, l) -> 326 | (fpath:pth, l) 327 | ) files 328 | (Just fpath, Just l, _) -> 329 | Just [([fpath], l)] 330 | (_, _, Just files) -> 331 | Just files 332 | _ -> 333 | Nothing 334 | 335 | --------------------------------------------------------------------- 336 | -- Extended message handshake 337 | -- 338 | 339 | extendedP :: BCode -> Maybe Word16 340 | extendedP = fmap fromIntegral . searchInt "p" 341 | 342 | extendedV :: BCode -> Maybe String 343 | extendedV = fmap ( fmap (chr . fromIntegral) ) . fmap B.unpack . searchStr "v" 344 | 345 | extendedRReq :: BCode -> Maybe Integer 346 | extendedRReq = searchInt "rreq" 347 | 348 | extendedMsg :: Integer -> String -> Integer -> BCode 349 | extendedMsg p v rreq = BDict $ M.fromList [(toBS "m", BDict M.empty) 350 | ,(toBS "p", BInt p) 351 | ,(toBS "v", BString $ toBS v) 352 | ,(toBS "rreq", BInt rreq)] 353 | 354 | --------------------------------------------------------------------- 355 | -- Pretty printing 356 | -- 357 | 358 | pp :: BCode -> Doc 359 | pp bc = 360 | case bc of 361 | BInt i -> integer i 362 | BString s -> text (show s) 363 | BArray arr -> text "[" <+> (cat $ intersperse comma al) <+> text "]" 364 | where al = map pp arr 365 | BDict mp -> text "{" <+> cat (intersperse comma mpl) <+> text "}" 366 | where mpl = map (\(s, bc') -> text (fromUtf8BS s) <+> text "->" <+> pp bc') $ M.toList mp 367 | 368 | prettyPrint :: BCode -> String 369 | prettyPrint = render . pp 370 | 371 | 372 | toBDict :: [(String,BCode)] -> BCode 373 | toBDict = BDict . M.fromList . map (\(k,v) -> ((toBS k),v)) 374 | 375 | toBString :: String -> BCode 376 | toBString = BString . toBS 377 | 378 | 379 | -- TESTS 380 | 381 | 382 | testSuite :: Test 383 | testSuite = testGroup "Protocol/BCode" 384 | [ testProperty "QC encode-decode/id" propEncodeDecodeId, 385 | testCase "HUnit encode-decode/id" testDecodeEncodeProp1 ] 386 | 387 | propEncodeDecodeId :: BCode -> Bool 388 | propEncodeDecodeId bc = 389 | let encoded = encode bc 390 | decoded = decode encoded 391 | in 392 | Right bc == decoded 393 | 394 | testDecodeEncodeProp1 :: Assertion 395 | testDecodeEncodeProp1 = 396 | let encoded = encode testData 397 | decoded = decode encoded 398 | in 399 | assertEqual "for encode/decode identify" (Right testData) decoded 400 | 401 | testData :: [BCode] 402 | testData = [BInt 123, 403 | BInt (-123), 404 | BString (toBS "Hello"), 405 | BString (toBS ['\NUL'..'\255']), 406 | BArray [BInt 1234567890 407 | ,toBString "a longer string with eieldei stuff to mess things up" 408 | ], 409 | toBDict [ 410 | ("hello",BInt 3) 411 | ,("a key",toBString "and a value") 412 | ,("a sub dict",toBDict [ 413 | ("some stuff",BInt 1) 414 | ,("some more stuff", toBString "with a string") 415 | ]) 416 | ] 417 | ] 418 | 419 | -------------------------------------------------------------------------------- /src/Protocol/Wire.hs: -------------------------------------------------------------------------------- 1 | {- 2 | A parser and encoder for the BitTorrent wire protocol using the 3 | cereal package. 4 | -} 5 | 6 | {-# LANGUAGE EmptyDataDecls #-} 7 | 8 | module Protocol.Wire 9 | ( Message(..) 10 | , msgSize 11 | , encodePacket 12 | , decodeMsg 13 | , getMsg 14 | , getAPMsg 15 | , BitField 16 | , constructBitField 17 | -- Handshaking 18 | , initiateHandshake 19 | , receiveHandshake 20 | -- Tests 21 | , testSuite 22 | ) 23 | where 24 | 25 | import Control.Applicative hiding (empty) 26 | import Control.Monad 27 | 28 | import qualified Data.ByteString as B 29 | import qualified Data.ByteString.Lazy as L 30 | 31 | import Data.Attoparsec.ByteString as A 32 | import Data.Bits (testBit, setBit) 33 | 34 | import Data.Serialize 35 | 36 | import Data.Char 37 | import Data.Word 38 | import Data.Maybe (catMaybes) 39 | import Network.Socket hiding (send, sendTo, recv, recvFrom, KeepAlive) 40 | import Network.Socket.ByteString 41 | import qualified Network.Socket.ByteString.Lazy as Lz 42 | 43 | import System.Log.Logger 44 | 45 | import Test.Framework 46 | import Test.Framework.Providers.QuickCheck2 47 | import Test.QuickCheck 48 | 49 | import qualified Protocol.BCode as BCode (BCode, encode) 50 | import Torrent 51 | 52 | ------------------------------------------------------------ 53 | 54 | type BitField = B.ByteString 55 | 56 | data Message = KeepAlive 57 | | Choke 58 | | Unchoke 59 | | Interested 60 | | NotInterested 61 | | Have PieceNum -- Int 62 | | BitField BitField 63 | | Request PieceNum Block 64 | | Piece PieceNum Int B.ByteString 65 | | Cancel PieceNum Block 66 | | Port Integer 67 | | HaveAll 68 | | HaveNone 69 | | Suggest PieceNum 70 | | RejectRequest PieceNum Block 71 | | AllowedFast PieceNum 72 | | ExtendedMsg Word8 B.ByteString 73 | deriving (Eq, Show) 74 | 75 | msgSize :: Message -> Int 76 | msgSize KeepAlive = 0 77 | msgSize Choke = 1 78 | msgSize Unchoke = 1 79 | msgSize Interested = 1 80 | msgSize NotInterested = 1 81 | msgSize (Have _) = 5 82 | msgSize (BitField bf) = B.length bf + 1 83 | msgSize (Request _ _) = 13 84 | msgSize (Piece _ _ bs) = 9 + B.length bs 85 | msgSize (Cancel _ _) = 13 86 | msgSize (Port _) = 3 87 | msgSize HaveAll = 1 88 | msgSize HaveNone = 1 89 | msgSize (Suggest _) = 5 90 | msgSize (RejectRequest _ _) = 13 91 | msgSize (AllowedFast _) = 5 92 | msgSize (ExtendedMsg _ bs) = B.length bs + 2 93 | 94 | instance Arbitrary Message where 95 | arbitrary = oneof [return KeepAlive, return Choke, return Unchoke, return Interested, 96 | return NotInterested, return HaveAll, return HaveNone, 97 | Suggest <$> pos, 98 | RejectRequest <$> pos <*> arbitrary, 99 | AllowedFast <$> pos, 100 | Have <$> pos, 101 | BitField <$> arbitrary, 102 | Request <$> pos <*> arbitrary, 103 | Piece <$> pos <*> pos <*> arbitrary, 104 | ExtendedMsg <$> arbitrary <*> arbitrary, 105 | Cancel <$> pos <*> arbitrary, 106 | let bc :: Gen B.ByteString 107 | bc = do b <- arbitrary :: Gen BCode.BCode 108 | return $ BCode.encode b 109 | in ExtendedMsg 0 <$> bc, 110 | Port <$> choose (0,16383)] 111 | where 112 | pos :: Gen Int 113 | pos = choose (0, 4294967296 - 1) 114 | 115 | 116 | -- | The Protocol header for the Peer Wire Protocol 117 | protocolHeader :: String 118 | protocolHeader = "BitTorrent protocol" 119 | 120 | p8 :: Word8 -> Put 121 | p8 = putWord8 122 | 123 | p32be :: Integral a => a -> Put 124 | p32be = putWord32be . fromIntegral 125 | 126 | decodeMsg :: Get Message 127 | decodeMsg = {-# SCC "decodeMsg" #-} get 128 | 129 | encodePacket :: Message -> L.ByteString 130 | encodePacket m = L.fromChunks [szEnc, mEnc] 131 | where mEnc = encode m 132 | sz = B.length mEnc 133 | szEnc = runPut . p32be $ sz 134 | 135 | instance Serialize Message where 136 | put KeepAlive = return () 137 | put Choke = p8 0 138 | put Unchoke = p8 1 139 | put Interested = p8 2 140 | put NotInterested = p8 3 141 | put (Have pn) = p8 4 *> p32be pn 142 | put (BitField bf) = p8 5 *> putByteString bf 143 | put (Request pn (Block os sz)) 144 | = p8 6 *> mapM_ p32be [pn,os,sz] 145 | put (Piece pn os c) = p8 7 *> mapM_ p32be [pn,os] *> putByteString c 146 | put (Cancel pn (Block os sz)) 147 | = p8 8 *> mapM_ p32be [pn,os,sz] 148 | put (Port p) = p8 9 *> (putWord16be . fromIntegral $ p) 149 | put (Suggest pn) = p8 0x0D *> p32be pn 150 | put (ExtendedMsg idx bs) 151 | = p8 20 *> p8 (fromIntegral idx) *> putByteString bs 152 | put HaveAll = p8 0x0E 153 | put HaveNone = p8 0x0F 154 | put (RejectRequest pn (Block os sz)) 155 | = p8 0x10 *> mapM_ p32be [pn,os,sz] 156 | put (AllowedFast pn) 157 | = p8 0x11 *> p32be pn 158 | 159 | get = getKA <|> getChoke 160 | <|> getUnchoke <|> getIntr 161 | <|> getNI <|> getHave 162 | <|> getBF <|> getReq 163 | <|> getPiece <|> getCancel 164 | <|> getPort <|> getHaveAll 165 | <|> getHaveNone 166 | <|> getSuggest <|> getRejectRequest 167 | <|> getAllowedFast 168 | <|> getExtendedMsg 169 | 170 | -- | Run attoparsec-based parser on inputgetMsg :: Parser Message 171 | getMsg :: Parser Message 172 | getMsg = do 173 | l <- apW32be 174 | if l == 0 175 | then return KeepAlive 176 | else getAPMsg l 177 | 178 | getAPMsg :: Int -> Parser Message 179 | getAPMsg l = do 180 | c <- A.anyWord8 181 | case c of 182 | 0 -> return Choke 183 | 1 -> return Unchoke 184 | 2 -> return Interested 185 | 3 -> return NotInterested 186 | 4 -> (Have <$> apW32be) 187 | 5 -> (BitField <$> (A.take (l-1))) 188 | 6 -> (Request <$> apW32be <*> (Block <$> apW32be <*> apW32be)) 189 | 7 -> (Piece <$> apW32be <*> apW32be <*> A.take (l - 9)) 190 | 8 -> (Cancel <$> apW32be <*> (Block <$> apW32be <*> apW32be)) 191 | 9 -> (Port . fromIntegral <$> apW16be) 192 | 0x0D -> (Suggest <$> apW32be) 193 | 0x0E -> return HaveAll 194 | 0x0F -> return HaveNone 195 | 0x10 -> (RejectRequest <$> apW32be <*> (Block <$> apW32be <*> apW32be)) 196 | 0x11 -> (AllowedFast <$> apW32be) 197 | 20 -> (ExtendedMsg <$> A.anyWord8 <*> A.take (l - 2)) 198 | k -> fail $ "Illegal parse, code: " ++ show k 199 | 200 | apW32be :: Parser Int 201 | apW32be = do 202 | [b1,b2,b3,b4] <- replicateM 4 A.anyWord8 203 | let b1' = fromIntegral b1 204 | b2' = fromIntegral b2 205 | b3' = fromIntegral b3 206 | b4' = fromIntegral b4 207 | return (b4' + (256 * b3') + (256 * 256 * b2') + (256 * 256 * 256 * b1')) 208 | 209 | apW16be :: Parser Int 210 | apW16be = do 211 | [b1, b2] <- replicateM 2 A.anyWord8 212 | let b1' = fromIntegral b1 213 | b2' = fromIntegral b2 214 | return (b2' + 256 * b1') 215 | 216 | 217 | getBF, getChoke, getUnchoke, getIntr, getNI, getHave, getReq :: Get Message 218 | getPiece, getCancel, getPort, getKA :: Get Message 219 | getRejectRequest, getAllowedFast, getSuggest, getHaveAll, getHaveNone :: Get Message 220 | getExtendedMsg :: Get Message 221 | getChoke = byte 0 *> return Choke 222 | getUnchoke = byte 1 *> return Unchoke 223 | getIntr = byte 2 *> return Interested 224 | getNI = byte 3 *> return NotInterested 225 | getHave = byte 4 *> (Have <$> gw32) 226 | getBF = byte 5 *> (BitField <$> (remaining >>= getByteString . fromIntegral)) 227 | getReq = byte 6 *> (Request <$> gw32 <*> (Block <$> gw32 <*> gw32)) 228 | getPiece = byte 7 *> (Piece <$> gw32 <*> gw32 <*> (remaining >>= getByteString)) 229 | getCancel = byte 8 *> (Cancel <$> gw32 <*> (Block <$> gw32 <*> gw32)) 230 | getPort = byte 9 *> (Port . fromIntegral <$> getWord16be) 231 | getSuggest = byte 0x0D *> (Suggest <$> gw32) 232 | getHaveAll = byte 0x0E *> return HaveAll 233 | getHaveNone = byte 0x0F *> return HaveNone 234 | getRejectRequest = byte 0x10 *> (RejectRequest <$> gw32 <*> (Block <$> gw32 <*> gw32)) 235 | getAllowedFast = byte 0x11 *> (AllowedFast <$> gw32) 236 | getExtendedMsg = byte 20 *> (ExtendedMsg <$> getWord8 <*> (remaining >>= getByteString)) 237 | getKA = do 238 | empty <- isEmpty 239 | if empty 240 | then return KeepAlive 241 | else fail "Non empty message - not a KeepAlive" 242 | 243 | gw32 :: Integral a => Get a 244 | gw32 = fromIntegral <$> getWord32be 245 | 246 | byte :: Word8 -> Get Word8 247 | byte w = do 248 | x <- lookAhead getWord8 249 | if x == w 250 | then getWord8 251 | else fail $ "Expected byte: " ++ show w ++ " got: " ++ show x 252 | 253 | -- | Size of the protocol header 254 | protocolHeaderSize :: Int 255 | protocolHeaderSize = length protocolHeader 256 | 257 | -- | Protocol handshake code. This encodes the protocol handshake part 258 | protocolHandshake :: L.ByteString 259 | protocolHandshake = L.fromChunks . map runPut $ 260 | [putWord8 $ fromIntegral protocolHeaderSize, 261 | mapM_ (putWord8 . fromIntegral . ord) protocolHeader, 262 | putWord64be extensionBasis] 263 | 264 | toBS :: String -> B.ByteString 265 | toBS = B.pack . map toW8 266 | 267 | toW8 :: Char -> Word8 268 | toW8 = fromIntegral . ord 269 | 270 | 271 | -- | Receive the header parts from the other end 272 | receiveHeader :: Socket -> Int -> (InfoHash -> Bool) 273 | -> IO (Either String ([Capabilities], L.ByteString, InfoHash)) 274 | receiveHeader sock sz ihTst = parseHeader `fmap` loop [] sz 275 | where parseHeader = runGet (headerParser ihTst) 276 | loop :: [B.ByteString] -> Int -> IO B.ByteString 277 | loop bs z | z == 0 = return . B.concat $ reverse bs 278 | | otherwise = do 279 | nbs <- recv sock z 280 | when (B.length nbs == 0) $ fail "Socket is dead" 281 | loop (nbs : bs) (z - B.length nbs) 282 | 283 | 284 | headerParser :: (InfoHash -> Bool) -> Get ([Capabilities], L.ByteString, InfoHash) 285 | headerParser ihTst = do 286 | hdSz <- getWord8 287 | when (fromIntegral hdSz /= protocolHeaderSize) $ fail "Wrong header size" 288 | protoString <- getByteString protocolHeaderSize 289 | when (protoString /= toBS protocolHeader) $ fail "Wrong protocol header" 290 | caps <- getWord64be 291 | ihR <- getByteString 20 292 | unless (ihTst ihR) $ fail "Unknown InfoHash" 293 | pid <- getLazyByteString 20 294 | return (decodeCapabilities caps, pid, ihR) 295 | 296 | extensionBasis :: Word64 297 | extensionBasis = 298 | (flip setBit 2) -- Fast extension 299 | . (flip setBit 20) -- Extended messaging support 300 | $ 0 301 | 302 | decodeCapabilities :: Word64 -> [Capabilities] 303 | decodeCapabilities w = catMaybes 304 | [ if testBit w 2 then Just Fast else Nothing, 305 | if testBit w 20 then Just Extended else Nothing ] 306 | 307 | -- | Initiate a handshake on a socket 308 | initiateHandshake :: Socket -> PeerId -> InfoHash 309 | -> IO (Either String ([Capabilities], L.ByteString, InfoHash)) 310 | initiateHandshake sock peerid infohash = do 311 | debugM "Protocol.Wire" "Sending off handshake message" 312 | _ <- Lz.send sock msg 313 | debugM "Protocol.Wire" "Receiving handshake from other end" 314 | receiveHeader sock sz (== infohash) 315 | where msg = handShakeMessage peerid infohash 316 | sz = fromIntegral (L.length msg) 317 | 318 | -- | Construct a default handshake message from a PeerId and an InfoHash 319 | handShakeMessage :: PeerId -> InfoHash -> L.ByteString 320 | handShakeMessage pid ih = 321 | L.fromChunks . map runPut $ [putLazyByteString protocolHandshake, 322 | putByteString ih, 323 | putByteString . toBS $ pid] 324 | 325 | -- | Receive a handshake on a socket 326 | receiveHandshake :: Socket -> PeerId -> (InfoHash -> Bool) 327 | -> IO (Either String ([Capabilities], L.ByteString, InfoHash)) 328 | receiveHandshake s pid ihTst = do 329 | debugM "Protocol.Wire" "Receiving handshake from other end" 330 | r <- receiveHeader s sz ihTst 331 | case r of 332 | Left err -> return $ Left err 333 | Right (caps, rpid, ih) -> 334 | do debugM "Protocol.Wire" "Sending back handshake message" 335 | _ <- Lz.send s (msg ih) 336 | return $ Right (caps, rpid, ih) 337 | where msg ih = handShakeMessage pid ih 338 | sz = fromIntegral (L.length $ msg (B.pack $ replicate 20 32)) -- Dummy value 339 | 340 | 341 | -- | The call @constructBitField pieces@ will return the a ByteString suitable for inclusion in a 342 | -- BITFIELD message to a peer. 343 | constructBitField :: Int -> [PieceNum] -> B.ByteString 344 | constructBitField sz pieces = B.pack . build $ m 345 | where m = map (`elem` pieces) [0..sz-1 + pad] 346 | pad = case sz `mod` 8 of 347 | 0 -> 0 348 | n -> 8 - n 349 | build [] = [] 350 | build l = let (first, rest) = splitAt 8 l 351 | in if length first /= 8 352 | then error "Wront bitfield" 353 | else bytify first : build rest 354 | bytify [b7,b6,b5,b4,b3,b2,b1,b0] = sum [if b0 then 1 else 0, 355 | if b1 then 2 else 0, 356 | if b2 then 4 else 0, 357 | if b3 then 8 else 0, 358 | if b4 then 16 else 0, 359 | if b5 then 32 else 0, 360 | if b6 then 64 else 0, 361 | if b7 then 128 else 0] 362 | bytify _ = error "Bitfield construction failed" 363 | 364 | -- 365 | -- -- TESTS 366 | testSuite :: Test 367 | testSuite = testGroup "Protocol/Wire" 368 | [ testProperty "QC encode-decode/id" propEncodeDecodeId 369 | , testProperty "QC encode-decode/id - attoparsec" propEncodeDecodeIdAP ] 370 | 371 | 372 | propEncodeDecodeId :: Message -> Bool 373 | propEncodeDecodeId m = 374 | let encoded = encode m 375 | decoded = decode encoded 376 | in 377 | Right m == decoded 378 | 379 | propEncodeDecodeIdAP :: Message -> Bool 380 | propEncodeDecodeIdAP m = 381 | let encoded = encodePacket m 382 | decoded = A.parse getMsg $ B.concat $ L.toChunks encoded 383 | in case decoded of 384 | A.Done r m2 -> B.null r && m == m2 385 | _ -> False 386 | 387 | -------------------------------------------------------------------------------- /doc/haskell-vs-erlang.mkd: -------------------------------------------------------------------------------- 1 | Haskell vs. Erlang 2 | ================== 3 | 4 | Since I wrote a bittorrent client in both Erlang and Haskell, etorrent and 5 | combinatorrent respectively, I decided to put up some bait. This might erupt in 6 | a language war and "My language is better than yours", but I feel I am 7 | obligated to write something subjective. Here is to woes of programming in 8 | Haskell and Erlang. 9 | 10 | Neither Haskell, nor Erlang was a first language for me. I have programmed 11 | serious programs in C, Standard ML, Ocaml, Python, Java and Perl; tasted the cake 12 | of Go, Javascript, Scheme and Ruby; and has written substantial stuff in Coq and Twelf. 13 | I love static type systems, a bias that will rear its ugly head and breathe 14 | fire. 15 | 16 | I have written Haskell code seriously since 2005 and Erlang code seriously 17 | since 2007. I have programmed functionally since 1997 or so. My toilet reading 18 | currently is "Categories for the working mathematician" by Mac Lane. Ten years 19 | ago it was "ML for the working programmer" by Paulson. 20 | 21 | Enough about me. 22 | 23 | Caveats: 24 | -------- 25 | 26 | With any language war material follows a disclaimer and a healthy dose of 27 | caveats. This is subjective. You have to live with it being subjective. My 28 | writing can't be objective and colorful at the same time. And I like colors in 29 | my life. Also, it is no fun reading a table listing the comparison. Rather, I 30 | will try to make it into a good foil duel with attacks, parries, guards, 31 | pierces, bananas, and barbed wire. 32 | 33 | I built etorrent in Erlang first and combinatorrent in Haskell second. Hence, 34 | the 2nd time around, with the sole goal of redoing the functionality of 35 | etorrent was much easier and could proceed much faster. The Erlang code is 36 | slightly fattier at 4.2K lines versus 3.6K lines of Haskell (SLOCs). The 37 | performance of the two clients is roughly equal, but more time was spent at 38 | optimizing the Haskell code. 39 | 40 | My hypothesis is this: The Erlang VM is much better at IO than the GHC compiler 41 | I wield and use for Haskell. GHC kills the Erlang VM for everything else 42 | though, perhaps included message passing. 43 | 44 | Also, the quality of the Erlang code could be better, relatively compared to 45 | the Haskell code. 46 | 47 | Enough! 48 | 49 | Enough with the caveats! 50 | 51 | Haskell cons: 52 | ------------- 53 | 54 | What weighs against using Haskell for the project? First is laziness. Sometimes 55 | you want your code to be strict and sometimes lazy. In combinatorrent, we do 56 | some statistics which we don't really need to calculate unless we want to 57 | present them. Stuff like bytes uploaded and downloaded for instance. Since you 58 | do not necessarily ask for these statistics, the compiler is free to build up 59 | thunks of the calculation and you have a neat little space leak. This is a 60 | recurring problem until you learn how to harness the strictness annotations 61 | of Haskell. Then the problem disappears. 62 | 63 | IO in Haskell is somewhat weak if you naively assume a String is fast. But 64 | there is help from Bytestrings, attoparsec and low-level Socket networking. 65 | Combinatorrent could use more help with getting the speed up here. I have 66 | substituted the IO layers lowest level some 2-3 times in combinatorrent. 67 | Contrast this with Erlang, where the original protocol parser and IO is the one 68 | still standing. 69 | 70 | The GHC compiler has, comparatively, more performance regressions compared to 71 | the Erlang VM. It should come as no surprise: GHC is acting as both a research 72 | vehicle and a compiler implementation. I want to stress however, that this has 73 | not worried me a lot. When asking the GHC developers for help, the response has 74 | been fast and helpful, and in every case it was easy to fix or work around. 75 | Also, change is a necessary thing if you want to improve. 76 | 77 | Haskell pros: 78 | ------------- 79 | 80 | Haskell has one very cool thing: Static typing (remember the bias!). The type 81 | system of Haskell is the most advanced type system for a general purpose 82 | language in existence. The only systems which can beat it are theorem provers 83 | like Coq, and they are not general purpose programming languages (Morriset and 84 | the YNot team might disagree though!). Static typing has some really cool 85 | merits. Bugs are caught fast and early; types ensure few corner cases in the 86 | programs (why check for null when it can't be represented). The types is my 87 | program skeleton and the program inhabiting the type is the flesh. Getting the 88 | skeleton right yields small and succinct programs. The abstraction 89 | possibilities from this is unparalleled in any language I have seen (and I've 90 | seen a few). 91 | 92 | The GHC compiler provides programs which have excellent execution speed. You 93 | don't need to worry a lot about speed when the compiler simply fixes most of 94 | the problems for you. This in turn means that you can write abstract code 95 | without worrying too much about the result. This yields vastly more general and 96 | simpler programs. 97 | 98 | One very big difference in the implementations is that of STM channels versus 99 | Erlangs message passing. In Erlang, each process has a mailbox of unbounded 100 | size. You send messages to the mailbox, identified by the process ID of the 101 | mailbox owner. In Haskell, we use STM Channels for most communication. Thus, 102 | you send messages not to the PID of a process, but to a specific channel. This 103 | effectively changes some rules in channel network configuration. In Erlang you 104 | must either globally register a process or propagate PIDs. In Haskell, channels 105 | are created and then propagated to communicating parties. I find the Haskell 106 | approach considerably easier - but also note that in a statically typed 107 | language, channels is the way to go. The sum type for a PID mailbox would be 108 | cumbersome in comparison. 109 | 110 | Haskell has excellent library and data structure support. For instance you have 111 | access to priority search queues via Hackage. PSQueues are useful for 112 | implementing the piece histogram in a bittorrent client: knowing how rare a 113 | given piece is so you can seek to fetch the rarest first. 114 | 115 | Haskell can create (im-)mutable (un-)boxed arrays. These are useful in a 116 | bittorrent client in several places. Immutable arrays for storing knowledge 117 | about pieces is an example. Or bit-arrays for storing knowledge about the 118 | pieces a given peer has. Erlang has no easy access to these and no guarantee of 119 | the data representation. 120 | 121 | Bryan O'Sullivans attoparsec library allows for incremental parsing. When you 122 | get a new chunk of data from the network, you feed it to attoparsec. It will 123 | either give you a parsed message and the remaining bytes, or it will hand you 124 | back a continuation. This continuation, if invoked with more food, will 125 | continue the parsing. For network sockets the incrementality is pure win. 126 | 127 | The GHC compiler has some awesome profiling tools, including a powerful heap 128 | profiler. Using this, the run-time and memory usage of combinatorrent was 129 | brought down. 130 | 131 | Finally, testing in Haskell is easy. QuickCheck and Test.Framework provides a 132 | --tests target built into the combinatorrent binary itself. Self tests are 133 | easy. 134 | 135 | Haskell mistakes: 136 | ----------------- 137 | 138 | I made some mistakes when writing the Haskell client. For one I relied on the 139 | CML library until I realized STM would do an equal or better job. The amount of 140 | Haskell developers with STM experience compared to the CML head-count made the 141 | decision to change easy. 142 | 143 | Furthermore, I should have focused on laziness earlier in the process. The 144 | first combinatorrent releases leak memory because of lazy thunk buildup. The 145 | latter versions, after I understood it intuitively, does not leak. 146 | 147 | Erlang cons: 148 | ------------ 149 | 150 | In Erlang, dynamic typing is the norm. Rather than enforce typing, you can get 151 | warnings by a type analyzer tool, the dialyzer, if need be. Running this on the 152 | code is a good idea to weed out some problems quickly. When building etorrent I 153 | had much use of the dialyzer and used a at that time experimental extension: 154 | spec() specifications. Yet, I think that 19/20 errors in my erlang programs 155 | were errors which a type system would have caught easily. This means you spend 156 | more time actually running the program and observing its behavior. Also note 157 | that dynamic typing hurts less in Erlang compared to other languages. A process 158 | is comprehensible in its own right and that reduces the interface to the 159 | process communication - a much simpler task. 160 | 161 | Etorrent has less stability than combinatorrent and has erred more. Yet, this 162 | is no problem for a bittorrent client since the supervisor-tree in Erlang/OTP 163 | will automatically restart broken parts of the system. For a bittorrent client 164 | we can live with a death once a week or once a day without any troubles. 165 | 166 | You have no mutability in Erlang and you have far less options for data 167 | representation. This in turn make certain algorithms rather hard to express or 168 | you have to opt for variant with a larger space usage. There were no 169 | Cabal-equivalent at the time I wrote the code and thus fewer libraries to 170 | choose from. 171 | 172 | For the built-in libraries, the HTTP library was more strict with respect to 173 | correctness. In turn, many trackers would not communicate with it and I had to 174 | provide a wrapper around the library. Today, this might have changed though. 175 | Haskells HTTP library worked out of the box with no changes. 176 | 177 | Erlangs syntax, compared to Haskell, is ugly, clunky and cumbersome. Make no 178 | mistake though: Tanks are ugly, clunky and cumbersome. It does not make tanks 179 | less menacing. 180 | 181 | Erlang pros: 182 | ------------ 183 | 184 | One application SASL. SASL is a system logger which will record in a 185 | ring-buffer any kind of process death and process restart. I used this a lot 186 | when developing. I would load a couple of torrents in the client and go to bed. 187 | Next morning I would check the SASL log for any error that might have occurred 188 | and fix those bugs. This way of developing is good for a bittorrent client: 189 | utmost stability is not needed. We just to get the number of errors below a 190 | certain threshold. Rather than waste time fixing a bug which only occurs once 191 | every year, we can concentrate on the things that matter. 192 | 193 | The IO layer in Erlangs VM is FAST! It is written in C, and it is optimized 194 | heavily because this is what Erlang does best. For file IO it uses asynchronous 195 | threads to circumvent having to wait on the kernel. For the network, it plugs 196 | into epoll() getting good performance in turn. 197 | 198 | The Beam VM of Erlang is a beast of stability. Basically, it doesn't quit 199 | unless you nuke it from orbit. One of the smaller things I learned some weeks 200 | ago was the rudimentary flow control trick. Erlang schedules by counting 201 | reductions in an Erlang process and then switching process context when it has 202 | no more reductions in its time share. Sending a message never fails but it 203 | costs reductions proportional to the queue size of the receiving process. 204 | Hence, many senders have a harder time overloading a single receiver. The trick 205 | is simple, easily implementable and provides some simple flow control. While 206 | not fail-safe, it ups the ante for when communication overload happens. 207 | 208 | Erlang has OTP, the Open Telecom Platform, which is a callback-framework for 209 | processes. You implement a set of callbacks and hand over control to the 210 | OTP-portion of your process. OTP then handles a lot of the ugly, gritty details 211 | leaving your part simple. OTP also provides the supervision of processes, 212 | restarting them if they err. Supervisor-processes form trees so *they* are in 213 | turn supervised. It isn't turtles all the way down in an Erlang VM... 214 | 215 | Erlang executes fast enough for most things. Haskell gives you faster 216 | execution, but Erlang was more than adequate for a bittorrent client in the 217 | speed department. As an example of how this plays together with the IO layer, 218 | an early version of etorrent could sustain 700 megabit network load on a local 219 | network of 1 gigabit when seeding. The current version of etorrent can do the 220 | same as a seeder I suspect. Also, message passing in Erlang is blazing fast. 221 | It feels like a function call - a key to good Erlang I think. 222 | 223 | The Erlang shell can easily be used as a poor mans user interface. Etorrent 224 | simply responds to some functions in the shell, showing status of the running 225 | system. I suspect GHCi can do the same, but I never got around to doing it and 226 | it doesn't seem as easy to pull off. 227 | 228 | I love the Erlang way of programming. You assume your code does the right thing 229 | and let it crash otherwise. If it crashes too often you handle that case. Code 230 | is not lingered with error handling for things that never happen and should it 231 | happen occasionally, the supervisor tree saves the day. 232 | 233 | 234 | Erlang mistakes: 235 | ---------------- 236 | 237 | Unfortunately, I made a number of mistakes in Etorrent. Most of these has to do 238 | with being the first version. Fred P. Brooks hinted that you want to throw away 239 | things when building the first version. And I did. I used ETS tables in places 240 | where they are not good. ETS is a table in which you can store any erlang term 241 | and later retrieve it. They give you a way to circumvent the representation 242 | limitation in Erlang. But they are no silver bullet: When you pull out a term, 243 | you copy it to the process pulling it. When your terms are 4-8 megabyte in 244 | size, that hurts a lot. 245 | 246 | I relied far too much on mnesia, the database in Erlang. Mnesia is basically 247 | using software-transactional-memory so locking is optimistic. When you have 248 | something like 80 writers wanting access to the same row in a table, then the 249 | system starves. Also, there is no need for a bittorrent application to require 250 | a mnesia store. A simple serialization of key data to a file based on a timer 251 | is more than adequate. 252 | 253 | I made several mistakes in the process model. I thought that choking was local 254 | to a torrent while in reality it is a global thing for all torrents currently 255 | being downloaded. These reorganizations require quite some refactoring - and 256 | missing in the static typing department these are somewhat more expensive 257 | compared to Haskell refactorings. 258 | 259 | I thought autotools were a good idea. It is not. Autotools is the Maven of C 260 | programming. 261 | 262 | Finally, I shedded unit-tests. In a dynamically typed environment you need lots 263 | and lots of these. But I decided against them early on. In hindsight this was 264 | probably a mistake. While unit-testing Erlang code is hard, it is by no means 265 | impossible. 266 | 267 | Future: 268 | ------- 269 | 270 | The future brings exciting things with it. I will continue Combinatorrent 271 | development. I am almost finished with the Fast-extension (BEP 0006) for 272 | combinatorrent and have some more optimization branches ready as well. I still 273 | follow Erlang in general because it is an interesting language with a lot of 274 | cool uses. I do check that etorrent compiles on new releases of Erlang. If 275 | anyone shows interest in any of the client implementations, feel free to 276 | contact me. I will happily answer questions. 277 | 278 | There is no clear winner in the duel. I *prefer* Haskell, but I am biased and 279 | believe in static typing. Yet I *like* programming in Erlang - both languages 280 | are good from different perspectives. 281 | 282 | -------------------------------------------------------------------------------- /src/Process/ChokeMgr.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveGeneric, TypeSynonymInstances, FlexibleContexts, TupleSections #-} 2 | module Process.ChokeMgr ( 3 | -- * Types, Channels 4 | ChokeMgrChannel 5 | , RateTVar 6 | , PeerRateInfo(..) 7 | , ChokeMgrMsg(..) 8 | -- * Interface 9 | , start 10 | ) 11 | where 12 | 13 | import Control.Concurrent 14 | import Control.Concurrent.STM 15 | import Control.DeepSeq 16 | import Control.Exception (assert) 17 | import Control.Monad.Reader 18 | import Control.Monad.State 19 | 20 | import Data.Function 21 | import Data.List 22 | import qualified Data.Map as M 23 | import qualified Data.Set as S 24 | import Data.Traversable as T 25 | import GHC.Generics 26 | 27 | import Prelude hiding (log) 28 | 29 | import System.Random 30 | 31 | import Channels hiding (Peer) 32 | import Process 33 | import Process.Timer 34 | import Supervisor 35 | import Torrent hiding (infoHash) 36 | 37 | -- DATA STRUCTURES 38 | ---------------------------------------------------------------------- 39 | 40 | -- | Messages to the Choke Manager 41 | data ChokeMgrMsg = Tick 42 | -- ^ Request that we run another round 43 | | RemovePeer ThreadId 44 | -- ^ Request that this peer is removed 45 | | AddPeer InfoHash ThreadId PeerChannel 46 | -- ^ Request that this peer is added 47 | | PieceDone InfoHash PieceNum 48 | -- ^ Note that a given piece is done 49 | | BlockComplete InfoHash PieceNum Block 50 | -- ^ Note that a block is complete (endgame) 51 | | TorrentComplete InfoHash 52 | -- ^ Note that the torrent in question is complete 53 | deriving (Generic) 54 | 55 | instance NFData ChokeMgrMsg 56 | 57 | 58 | type ChokeMgrChannel = TChan ChokeMgrMsg 59 | data PeerRateInfo = PRI { 60 | peerUpRate :: Double, 61 | peerDownRate :: Double, 62 | peerInterested :: Bool, 63 | peerSeeding :: Bool, 64 | peerSnubs :: Bool, 65 | peerChokingUs :: Bool } 66 | deriving Show 67 | 68 | type RateTVar = TVar [(ThreadId, PeerRateInfo)] 69 | 70 | data CF = CF { mgrCh :: ChokeMgrChannel 71 | , rateTV :: RateTVar } 72 | 73 | instance Logging CF where 74 | logName _ = "Process.ChokeMgr" 75 | 76 | -- PeerDB described below 77 | type ChokeMgrProcess a = Process CF PeerDB a 78 | 79 | -- INTERFACE 80 | ---------------------------------------------------------------------- 81 | 82 | roundTickSecs :: Int 83 | roundTickSecs = 11 84 | 85 | start :: ChokeMgrChannel -> RateTVar -> Int -> SupervisorChannel 86 | -> IO ThreadId 87 | start ch rtv ur supC = do 88 | _ <- registerSTM roundTickSecs ch Tick 89 | spawnP (CF ch rtv) (initPeerDB $ calcUploadSlots ur Nothing) 90 | ({-# SCC "ChokeMgr" #-} catchP pgm 91 | (defaultStopHandler supC)) 92 | where 93 | initPeerDB slots = PeerDB 2 slots S.empty M.empty [] 94 | pgm = do 95 | msg <- liftIO . atomically $ readTChan ch 96 | case msg of 97 | Tick -> tick 98 | RemovePeer t -> removePeer t 99 | AddPeer ih t pCh -> do 100 | debugP $ "Adding peer " ++ show (ih, t) 101 | addPeer pCh ih t 102 | BlockComplete ih pn blk -> informBlockComplete ih pn blk 103 | PieceDone ih pn -> informDone ih pn 104 | TorrentComplete ih -> modify (\s -> s { seeding = S.insert ih $ seeding s }) 105 | pgm 106 | tick = do debugP "Ticked" 107 | c <- asks mgrCh 108 | _ <- registerSTM roundTickSecs c Tick 109 | updateDB 110 | runRechokeRound 111 | removePeer tid = do debugP $ "Removing peer " ++ show tid 112 | modify (\db -> db { chain = filter (not . isPeer tid) (chain db) 113 | , rateMap = M.delete tid (rateMap db) }) 114 | isPeer tid pr | tid == pThreadId pr = True 115 | | otherwise = False 116 | 117 | -- INTERNAL FUNCTIONS 118 | ---------------------------------------------------------------------- 119 | 120 | -- The data structure is split into pieces so it is easier to manipulate. 121 | -- The PeerDB is the state we thread around in the process. The PChain contains all 122 | -- the important information about processes. 123 | type PChain = [Peer] 124 | 125 | -- | Main data for a peer 126 | data Peer = Peer 127 | { pThreadId :: ThreadId 128 | , pInfoHash :: InfoHash 129 | , pChannel :: PeerChannel 130 | } 131 | 132 | instance Show Peer where 133 | show (Peer tid _ _) = "(Peer " ++ show tid ++ "...)" 134 | 135 | -- | Peer upload and download ratio 136 | data PRate = PRate { pUpRate :: Double, 137 | pDownRate :: Double } 138 | deriving Show 139 | -- | Current State of the peer 140 | data PState = PState { pChokingUs :: Bool -- ^ True if the peer is choking us 141 | , pInterestedInUs :: Bool -- ^ Reflection from Peer DB 142 | , pIsASeeder :: Bool -- ^ True if the peer is a seeder 143 | , pIsSnubbed :: Bool -- ^ True if peer snubs us 144 | } 145 | deriving Show 146 | 147 | type RateMap = M.Map ThreadId (PRate, PState) 148 | data PeerDB = PeerDB 149 | { chokeRound :: Int -- ^ Counted down by one from 2. If 0 then we should 150 | -- advance the peer chain. (Optimistic Unchoking) 151 | , uploadSlots :: Int -- ^ Current number of upload slots 152 | , seeding :: S.Set InfoHash -- ^ Set of torrents we seed 153 | , rateMap :: RateMap -- ^ Map from Peer ThreadIds to state 154 | , chain :: PChain -- ^ The order in which peers are optimistically unchoked 155 | } 156 | 157 | -- | Update the Peer Database with the newest information from peers 158 | updateDB :: ChokeMgrProcess () 159 | updateDB = do 160 | rc <- asks rateTV 161 | rateUpdate <- liftIO . atomically $ do 162 | q <- readTVar rc 163 | writeTVar rc [] 164 | return q 165 | case rateUpdate of 166 | [] -> return () 167 | updates -> let f old (tid, pri) = 168 | M.insert tid (PRate { pUpRate = peerUpRate pri, 169 | pDownRate = peerDownRate pri }, 170 | PState { pInterestedInUs = peerInterested pri, 171 | pIsASeeder = peerSeeding pri, 172 | pChokingUs = peerChokingUs pri, 173 | pIsSnubbed = peerSnubs pri}) old 174 | nm m = foldl f m $ reverse updates 175 | in do 176 | debugP $ "Rate updates since last round: " ++ show updates 177 | modify (\db -> db { rateMap = nm (rateMap db) }) 178 | 179 | addPeer :: PeerChannel -> InfoHash -> ThreadId -> ChokeMgrProcess () 180 | addPeer ch ih t = do 181 | chn <- gets chain 182 | pt <- liftIO $ getStdRandom (\gen -> randomR (0, length chn - 1) gen) 183 | let (front, back) = splitAt pt chn 184 | modify (\db -> db { chain = (front ++ initPeer : back) }) 185 | where initPeer = Peer t ih ch 186 | 187 | runRechokeRound :: ChokeMgrProcess () 188 | runRechokeRound = do 189 | cRound <- gets chokeRound 190 | if (cRound == 0) 191 | then do advancePeerChain 192 | modify (\db -> db { chokeRound = 2 }) 193 | else modify (\db -> db { chokeRound = (chokeRound db) - 1 }) 194 | rechoke 195 | 196 | -- | Advance the peer chain to the next peer eligible for optimistic 197 | -- unchoking. That is, skip peers which are not interested in our pieces 198 | -- and peers which are not choking us. The former we can't send any data to, 199 | -- so we can't get better speeds at them. The latter are already sending us data, 200 | -- so we know how good they are as peers. 201 | advancePeerChain :: ChokeMgrProcess () 202 | advancePeerChain = do 203 | peers <- gets chain 204 | rd <- gets rateMap 205 | let (front, back) = break (breakPoint rd) peers 206 | modify (\db -> db { chain = back ++ front }) 207 | where 208 | breakPoint rd peer = 209 | case M.lookup (pThreadId peer) rd of 210 | Nothing -> True -- Really new peer, give it the chance :) 211 | Just (_, st) -> pInterestedInUs st && pChokingUs st 212 | 213 | rechoke :: ChokeMgrProcess () 214 | rechoke = do 215 | us <- gets uploadSlots 216 | chn <- gets chain 217 | sd <- gets seeding 218 | rm <- gets rateMap 219 | debugP $ "Chain is: " ++ show (map pThreadId chn) 220 | debugP $ "RateMap is: " ++ show rm 221 | let (seed, down) = splitSeedLeech sd rm chn 222 | debugP $ "Seeders " ++ show seed 223 | debugP $ "Downloaders " ++ show down 224 | electedPeers <- selectPeers us down seed 225 | performChokingUnchoking electedPeers chn 226 | 227 | -- | Function to split peers into those where we are seeding and those where we 228 | -- are leeching. also prunes the list for peers which are not interesting. 229 | -- TODO: Snubbed peers 230 | splitSeedLeech :: S.Set InfoHash -> RateMap -> [Peer] -> ([Peer], [Peer]) 231 | splitSeedLeech seeders rm ps = foldl' splitter ([], []) ps 232 | where 233 | splitter (seeds, leeching) p = 234 | case M.lookup (pThreadId p) rm of 235 | Nothing -> (seeds, leeching) -- Know nothing on the peer yet 236 | Just (_, st) | pIsASeeder st || not (pInterestedInUs st) -> (seeds, leeching) 237 | | S.member (pInfoHash p) seeders -> (p : seeds, leeching) 238 | | pIsSnubbed st -> (seeds, leeching) 239 | | otherwise -> (seeds, p : leeching) 240 | 241 | -- | Comparison with inverse ordering 242 | compareInv :: Ord a => a -> a -> Ordering 243 | compareInv x y = 244 | case compare x y of 245 | LT -> GT 246 | EQ -> EQ 247 | GT -> LT 248 | 249 | comparingWith :: (a -> a -> Ordering) -> (b -> a) -> b -> b -> Ordering 250 | comparingWith comp project x y = 251 | comp (project x) (project y) 252 | 253 | -- | Leechers are sorted by their current download rate. We want to keep fast peers around. 254 | sortLeech :: [(Peer, (PRate, PState))] -> [(Peer, (PRate, PState))] 255 | sortLeech = sortBy $ comparingWith compareInv (pDownRate . fst . snd) 256 | 257 | -- | Seeders are sorted by their current upload rate. 258 | sortSeeds :: [(Peer, (PRate, PState))] -> [(Peer, (PRate, PState))] 259 | sortSeeds = sortBy $ comparingWith compareInv (pUpRate . fst . snd) 260 | 261 | 262 | -- | Calculate the amount of upload slots we have available. If the 263 | -- number of slots is explicitly given, use that. Otherwise we 264 | -- choose the slots based the current upload rate set. The faster 265 | -- the rate, the more slots we allow. 266 | calcUploadSlots :: Int -> Maybe Int -> Int 267 | calcUploadSlots _ (Just n) = n 268 | calcUploadSlots rate Nothing | rate <= 0 = 7 -- This is just a guess 269 | | rate < 9 = 2 270 | | rate < 15 = 3 271 | | rate < 42 = 4 272 | | otherwise = calcRate $ fromIntegral rate 273 | where calcRate :: Double -> Int 274 | calcRate x = round $ sqrt (x * 0.6) 275 | 276 | -- | The call @assignUploadSlots c ds ss@ will assume that we have @c@ 277 | -- slots for uploading at our disposal. The list @ds@ will be peers 278 | -- that we would like to upload to among the torrents we are 279 | -- currently downloading. The list @ss@ is the same thing but for 280 | -- torrents that we seed. The function returns a pair @(kd,ks)@ 281 | -- where @kd@ is the number of downloader slots and @ks@ is the 282 | -- number of seeder slots. 283 | -- 284 | -- The function will move surplus slots around so all of them gets used. 285 | assignUploadSlots :: Int -> Int -> Int -> (Int, Int) 286 | assignUploadSlots slots numDownPeers numSeedPeers = 287 | -- Shuffle surplus slots around so all gets used 288 | shuffleSeeders . shuffleDownloaders $ (downloaderSlots, seederSlots) 289 | where 290 | -- Calculate the slots available for the downloaders and seeders 291 | -- We allocate 70% of them to leeching and 30% of the to seeding 292 | -- though we assign at least one slot to both 293 | slotRound :: Double -> Double -> Int 294 | slotRound ss fraction = max 1 $ round $ ss * fraction 295 | 296 | downloaderSlots = slotRound (fromIntegral slots) 0.7 297 | seederSlots = slotRound (fromIntegral slots) 0.3 298 | 299 | -- If there is a surplus of downloader slots, then assign them to 300 | -- the seeder slots 301 | shuffleDownloaders (dSlots, sSlots) = 302 | case max 0 (dSlots - numDownPeers) of 303 | 0 -> (dSlots, sSlots) 304 | k -> (dSlots - k, sSlots + k) 305 | 306 | -- If there is a surplus of seeder slots, then assign these to 307 | -- the downloader slots. Limit the downloader slots to the number 308 | -- of downloaders, however 309 | shuffleSeeders (dSlots, sSlots) = 310 | case max 0 (sSlots - numSeedPeers) of 311 | 0 -> (dSlots, sSlots) 312 | k -> (min (dSlots + k) numDownPeers, sSlots - k) 313 | 314 | -- | @selectPeers upSlots d s@ selects peers from a list of downloader peers @d@ and a list of seeder 315 | -- peers @s@. The value of @upSlots@ defines the number of upload slots available 316 | selectPeers :: Int -> [Peer] -> [Peer] -> ChokeMgrProcess (S.Set ThreadId) 317 | selectPeers ups downPeers seedPeers = do 318 | rm <- gets rateMap 319 | let selector p = maybe (p, (PRate 0.0 0.0, PState True False False False)) (p,) 320 | (M.lookup (pThreadId p) rm) 321 | dp = map selector downPeers 322 | sp = map selector seedPeers 323 | (nDownSlots, nSeedSlots) = assignUploadSlots ups (length downPeers) (length seedPeers) 324 | downPids = S.fromList $ map (pThreadId . fst) $ take nDownSlots $ sortLeech dp 325 | seedPids = S.fromList $ map (pThreadId . fst) $ take nSeedSlots $ sortSeeds sp 326 | debugP $ "Leechers: " ++ show (length downPeers) ++ ", Seeders: " ++ show (length seedPeers) 327 | debugP $ "Slots: " ++ show nDownSlots ++ " downloads, " ++ show nSeedSlots ++ " seeders" 328 | debugP $ "Electing peers - leechers: " ++ show downPids ++ "; seeders: " ++ show seedPids 329 | return $ assertSlots (nDownSlots + nSeedSlots) (S.union downPids seedPids) 330 | where assertSlots slots = assert (ups >= slots) 331 | 332 | -- | Send a message to the peer process at PeerChannel. Message is sent asynchronously 333 | -- to the peer in question. If the system is really loaded, this might 334 | -- actually fail since the order in which messages arrive might be inverted. 335 | msgPeer :: PeerChannel -> PeerChokeMsg -> ChokeMgrProcess () 336 | msgPeer ch = liftIO . atomically . writeTChan ch . FromChokeMgr 337 | 338 | -- | This function performs the choking and unchoking of peers in a round. 339 | performChokingUnchoking :: S.Set ThreadId -> [Peer] -> ChokeMgrProcess () 340 | performChokingUnchoking elected peers = 341 | do _ <- T.mapM unchoke electedPeers 342 | rm <- gets rateMap 343 | optChoke rm defaultOptimisticSlots nonElectedPeers 344 | where 345 | -- Partition the peers in elected and non-elected 346 | (electedPeers, nonElectedPeers) = partition (\rd -> S.member (pThreadId rd) elected) peers 347 | unchoke p = do 348 | debugP $ "Unchoking: " ++ show p 349 | msgPeer (pChannel p) UnchokePeer 350 | choke p = do 351 | debugP $ "Choking: " ++ show p 352 | msgPeer (pChannel p) ChokePeer 353 | 354 | -- If we have k optimistic slots, @optChoke k peers@ will unchoke the first 355 | -- @k@ peers interested in us. The rest will either be unchoked if they are 356 | -- not interested (ensuring fast start should they become interested); or 357 | -- they will be choked to avoid TCP/IP congestion. 358 | optChoke _rm _ [] = return () 359 | optChoke rm 0 (p : ps) = 360 | case M.lookup (pThreadId p) rm of 361 | Nothing -> choke p >> optChoke rm 0 ps 362 | Just (_, st) -> 363 | if pInterestedInUs st 364 | then choke p >> optChoke rm 0 ps 365 | else unchoke p >> optChoke rm 0 ps 366 | optChoke rm k (p : ps) = 367 | case M.lookup (pThreadId p) rm of 368 | Nothing -> unchoke p >> optChoke rm (k-1) ps 369 | Just (_, st) -> 370 | if pInterestedInUs st 371 | then unchoke p >> optChoke rm (k-1) ps 372 | else unchoke p >> optChoke rm k ps 373 | 374 | informDone :: InfoHash -> PieceNum -> ChokeMgrProcess () 375 | informDone ih pn = do 376 | chn <- gets chain 377 | T.mapM inform chn >> return () 378 | where inform p | (pInfoHash p) == ih = sendDone p >> return () 379 | | otherwise = return () 380 | sendDone p = msgPeer (pChannel p) (PieceCompleted pn) 381 | 382 | informBlockComplete :: InfoHash -> PieceNum -> Block -> ChokeMgrProcess () 383 | informBlockComplete ih pn blk = do 384 | chn <- gets chain 385 | T.mapM inform chn >> return () 386 | where inform p | (pInfoHash p) == ih = sendComp p >> return () 387 | | otherwise = return () 388 | sendComp p = msgPeer (pChannel p) (CancelBlock pn blk) 389 | 390 | --------------------------------------------------------------------------------