├── bytestring-0.9.cabal.config
├── htar
    ├── Setup.hs
    ├── LICENSE
    ├── htar.cabal
    └── htar.hs
├── Setup.lhs
├── README.md
├── bench
    └── Main.hs
├── LICENSE
├── test
    └── Properties.hs
├── changelog.md
├── Codec
    └── Archive
    │   ├── Tar
    │       ├── Entry.hs
    │       ├── Unpack.hs
    │       ├── Write.hs
    │       ├── Pack.hs
    │       ├── Check.hs
    │       ├── Read.hs
    │       ├── Index
    │       │   ├── StringTable.hs
    │       │   └── IntTrie.hs
    │       ├── Types.hs
    │       └── Index.hs
    │   └── Tar.hs
├── tar.cabal
└── .travis.yml


/bytestring-0.9.cabal.config:
--------------------------------------------------------------------------------
1 | constraints: bytestring==0.9.*
2 | 


--------------------------------------------------------------------------------
/htar/Setup.hs:
--------------------------------------------------------------------------------
1 | import Distribution.Simple
2 | main = defaultMain
3 | 


--------------------------------------------------------------------------------
/htar/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CIFASIS/tar/master/htar/LICENSE


--------------------------------------------------------------------------------
/Setup.lhs:
--------------------------------------------------------------------------------
1 | > import Distribution.Simple
2 | 
3 | > main :: IO ()
4 | > main = defaultMain
5 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | The `tar` Package  [![Hackage](https://img.shields.io/hackage/v/tar.svg)](https://hackage.haskell.org/package/tar) [![Build Status](https://travis-ci.org/haskell/tar.svg)](https://travis-ci.org/haskell/tar)
2 | =================
3 | 
4 | See [`tar` on Hackage](https://hackage.haskell.org/package/tar) for more information.
5 | 


--------------------------------------------------------------------------------
/bench/Main.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | import qualified Codec.Archive.Tar       as Tar
 4 | import qualified Codec.Archive.Tar.Index as TarIndex
 5 | 
 6 | import qualified Data.ByteString.Lazy    as BS
 7 | import Control.Exception
 8 | 
 9 | import Criterion
10 | import Criterion.Main
11 | 
12 | main = defaultMain benchmarks
13 | 
14 | benchmarks :: [Benchmark]
15 | benchmarks =
16 |   [ env loadTarFile $ \tarfile ->
17 |       bench "read" (nf Tar.read tarfile)
18 | 
19 |   , env loadTarEntriesList $ \entries ->
20 |       bench "write" (nf Tar.write entries)
21 | 
22 |   , env loadTarEntries $ \entries ->
23 |       bench "index build" (nf TarIndex.build entries)
24 | 
25 |   , env loadTarIndex $ \entries ->
26 |       bench "index rebuild" (nf (TarIndex.finalise . TarIndex.unfinalise) entries)
27 |   ]
28 | 
29 | loadTarFile :: IO BS.ByteString
30 | loadTarFile =
31 |     BS.readFile "01-index.tar"
32 | 
33 | loadTarEntries :: IO (Tar.Entries Tar.FormatError)
34 | loadTarEntries =
35 |     fmap Tar.read loadTarFile
36 | 
37 | loadTarEntriesList :: IO [Tar.Entry]
38 | loadTarEntriesList =
39 |     fmap (Tar.foldEntries (:) [] throw) loadTarEntries
40 | 
41 | loadTarIndex :: IO TarIndex.TarIndex
42 | loadTarIndex =
43 |     fmap (either throw id . TarIndex.build)
44 |          loadTarEntries
45 | 
46 | 


--------------------------------------------------------------------------------
/htar/htar.cabal:
--------------------------------------------------------------------------------
 1 | name:            htar
 2 | version:         0.4.0.2
 3 | license:         BSD3
 4 | license-File:    LICENSE
 5 | author:          Bjorn Bringert <bjorn@bringert.net>
 6 |                  Duncan Coutts <duncan@community.haskell.org>
 7 | maintainer:      Duncan Coutts <duncan@community.haskell.org>
 8 | copyright:       2007 Bjorn Bringert <bjorn@bringert.net>
 9 |                  2008-2015 Duncan Coutts <duncan@community.haskell.org>
10 | category:        Codec
11 | synopsis:        Command-line tar archive utility.
12 | description:     A Command-line utility to create, extract and list the
13 |                  contents of tar archives. It can work with compressed
14 |                  archives using gzip or bzip2 compression.
15 |                  .
16 |                  This is in part a demo of the @tar@ library but it is also
17 |                  usable in place of the ordinary @tar@ program for systems
18 |                  like Windows that do not come with it as standard.
19 | build-type:      Simple
20 | cabal-version:   >= 1.6
21 | 
22 | flag old-locale
23 | 
24 | executable htar
25 |   main-is: htar.hs
26 |   ghc-options: -Wall
27 |   build-depends:
28 |     base       == 4.*,
29 |     time       >= 1.1,
30 |     directory  >= 1.0,
31 |     filepath   >= 1.0,
32 |     bytestring >= 0.9,
33 |     tar        == 0.4.* && >= 0.4.2,
34 |     zlib       >= 0.4 && < 0.7,
35 |     bzlib      >= 0.4 && < 0.7
36 | 
37 |   if flag(old-locale)
38 |     build-depends: time < 1.5, old-locale >= 1.0
39 |   else
40 |     build-depends: time >= 1.5
41 | 
42 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2007      Björn Bringert,
 2 |               2008-2015 Duncan Coutts,
 3 |               2011      Max Bolingbroke
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without 
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | - Redistributions of source code must retain the above copyright notice, 
10 |   this list of conditions and the following disclaimer.
11 | - Redistributions in binary form must reproduce the above copyright 
12 |   notice, this list of conditions and the following disclaimer in the 
13 |   documentation and/or other materials provided with the distribution.
14 | - Neither the names of the copyright owners nor the names of the 
15 |   contributors may be used to endorse or promote products derived 
16 |   from this software without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/test/Properties.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | import qualified Codec.Archive.Tar.Index as Index
 4 | import qualified Codec.Archive.Tar.Index.IntTrie as IntTrie
 5 | import qualified Codec.Archive.Tar.Index.StringTable as StringTable
 6 | import qualified Codec.Archive.Tar       as Tar
 7 | 
 8 | import qualified Data.ByteString as BS
 9 | 
10 | import Test.Tasty
11 | import Test.Tasty.QuickCheck
12 | 
13 | main :: IO ()
14 | main =
15 |   defaultMain $
16 |     testGroup "tar tests" [
17 | 
18 |       testGroup "write/read" [
19 |         testProperty "ustar format" Tar.prop_write_read_ustar,
20 |         testProperty "gnu format"   Tar.prop_write_read_gnu,
21 |         testProperty "v7 format"    Tar.prop_write_read_v7
22 |       ]
23 | 
24 |     , testGroup "string table" [
25 |         testProperty "construction" StringTable.prop_valid,
26 |         testProperty "sorted"       StringTable.prop_sorted,
27 |         testProperty "serialise"    StringTable.prop_serialise_deserialise,
28 |         testProperty "size"         StringTable.prop_serialiseSize,
29 |         testProperty "unfinalise"   StringTable.prop_finalise_unfinalise
30 |       ]
31 | 
32 |     , testGroup "int trie" [
33 |         testProperty "unit 1"      IntTrie.test1,
34 |         testProperty "unit 2"      IntTrie.test2,
35 |         testProperty "unit 3"      IntTrie.test3,
36 |         testProperty "lookups"     IntTrie.prop_lookup_mono,
37 |         testProperty "completions" IntTrie.prop_completions_mono,
38 |         testProperty "toList"      IntTrie.prop_construct_toList,
39 |         testProperty "serialise"   IntTrie.prop_serialise_deserialise,
40 |         testProperty "size"        IntTrie.prop_serialiseSize,
41 |         testProperty "unfinalise"  IntTrie.prop_finalise_unfinalise
42 |       ]
43 | 
44 |     , testGroup "index" [
45 |         testProperty "lookup"      Index.prop_lookup,
46 |         testProperty "valid"       Index.prop_valid,
47 |         testProperty "toList"      Index.prop_toList,
48 |         testProperty "serialise"   Index.prop_serialise_deserialise,
49 |         testProperty "size"        Index.prop_serialiseSize,
50 |         testProperty "matches tar" Index.prop_index_matches_tar,
51 |         testProperty "unfinalise"  Index.prop_finalise_unfinalise
52 |       ]
53 |     ]
54 | 
55 | 


--------------------------------------------------------------------------------
/changelog.md:
--------------------------------------------------------------------------------
 1 | 0.5.0.1 Duncan Coutts <duncan@community.haskell.org> January 2016
 2 | 
 3 |   * Fix compatability with directory-1.2.3+
 4 | 
 5 | 0.5.0.0 Duncan Coutts <duncan@community.haskell.org> January 2016
 6 | 
 7 |   * Work with old version of bytestring (using bytestring-builder package).
 8 |   * Builds with GHC 6.10 -- 8.0.
 9 |   * Change type of Index.serialise to be simply strict bytestring.
10 |   * Preserve file timestamps on unpack (with directory-1.2.3+)
11 | 
12 | 0.4.5.0 Duncan Coutts <duncan@community.haskell.org> January 2016
13 | 
14 |   * Revert accidental minor API change in 0.4.x series (the type of the
15 |     owner and group name strings). The 0.4.3.0 and 0.4.4.0 releases
16 |     contained the accidental API change.
17 |   * Add a handy foldlEntries function
18 | 
19 | 0.4.4.0 Duncan Coutts <duncan@community.haskell.org> January 2016
20 | 
21 |   * Build and warning fixes for GHC 7.10 and 8.0
22 |   * New Index module function `toList` to get all index entries
23 | 
24 | 0.4.3.0 Duncan Coutts <duncan@community.haskell.org> January 2016
25 | 
26 |   * New Index function `unfinalise` to extend existing index
27 |   * 9x  faster reading
28 |   * 9x  faster index construction
29 |   * 24x faster index extension
30 |   * More compact entry types, using ByteStrings
31 |   * More Eq and Show instances
32 |   * Greater QC test coverage
33 |   * Fix minor bug in reading non-standard v7 format entries
34 | 
35 | 0.4.2.2 Edsko de Vries <edsko@well-typed.com> October 2015
36 | 
37 |   * Fix bug in Index
38 | 
39 | 0.4.2.1 Duncan Coutts <duncan@community.haskell.org> July 2015
40 | 
41 |   * Fix tests for the Index modules (the code was right)
42 | 
43 | 0.4.2.0 Duncan Coutts <duncan@community.haskell.org> July 2015
44 | 
45 |   * New Index module for random access to tar file contents
46 |   * New lower level tar file I/O actions
47 |   * New tarball file 'append' action
48 | 
49 | 0.4.1.0 Duncan Coutts <duncan@community.haskell.org> January 2015
50 | 
51 |   * Build with GHC 7.10
52 |   * Switch from old-time to time package
53 |   * Added more instance for Entries type
54 | 
55 | 0.4.0.1 Duncan Coutts <duncan@community.haskell.org> October 2012
56 | 
57 |   * fixes to work with directory 1.2
58 |   * More Eq/Ord instances
59 | 
60 | 0.4.0.0 Duncan Coutts <duncan@community.haskell.org> February 2012
61 | 
62 |   * More explicit error types and error handling
63 |   * Support star base-256 number format
64 |   * Improved API documentation
65 | 


--------------------------------------------------------------------------------
/Codec/Archive/Tar/Entry.hs:
--------------------------------------------------------------------------------
 1 | -----------------------------------------------------------------------------
 2 | -- |
 3 | -- Module      :  Codec.Archive.Tar.Entry
 4 | -- Copyright   :  (c) 2007 Bjorn Bringert,
 5 | --                    2008 Andrea Vezzosi,
 6 | --                    2008-2009 Duncan Coutts
 7 | -- License     :  BSD3
 8 | --
 9 | -- Maintainer  :  duncan@community.haskell.org
10 | -- Portability :  portable
11 | --
12 | -- Types and functions to manipulate tar entries.
13 | --
14 | -- While the "Codec.Archive.Tar" module provides only the simple high level
15 | -- API, this module provides full access to the details of tar entries. This
16 | -- lets you inspect all the meta-data, construct entries and handle error cases
17 | -- more precisely.
18 | --
19 | -- This module uses common names and so is designed to be imported qualified:
20 | --
21 | -- > import qualified Codec.Archive.Tar       as Tar
22 | -- > import qualified Codec.Archive.Tar.Entry as Tar
23 | --
24 | -----------------------------------------------------------------------------
25 | module Codec.Archive.Tar.Entry (
26 | 
27 |   -- * Tar entry and associated types
28 |   Entry(..),
29 |   --TODO: should be the following with the Entry constructor not exported,
30 |   --      but haddock cannot document that properly
31 |   --      see http://trac.haskell.org/haddock/ticket/3
32 |   --Entry(filePath, fileMode, ownerId, groupId, fileSize, modTime,
33 |   --      fileType, linkTarget, headerExt, fileContent),
34 |   entryPath,
35 |   EntryContent(..),
36 |   Ownership(..),
37 | 
38 |   FileSize,
39 |   Permissions,
40 |   EpochTime,
41 |   DevMajor,
42 |   DevMinor,
43 |   TypeCode,
44 |   Format(..),
45 | 
46 |   -- * Constructing simple entry values
47 |   simpleEntry,
48 |   fileEntry,
49 |   directoryEntry,
50 | 
51 |   -- * Standard file permissions
52 |   -- | For maximum portability when constructing archives use only these file
53 |   -- permissions.
54 |   ordinaryFilePermissions,
55 |   executableFilePermissions,
56 |   directoryPermissions,
57 | 
58 |   -- * Constructing entries from disk files
59 |   packFileEntry,
60 |   packDirectoryEntry,
61 |   getDirectoryContentsRecursive,
62 | 
63 |   -- * TarPath type
64 |   TarPath,
65 |   toTarPath,
66 |   fromTarPath,
67 |   fromTarPathToPosixPath,
68 |   fromTarPathToWindowsPath,
69 | 
70 |   -- * LinkTarget type
71 |   LinkTarget,
72 |   toLinkTarget,
73 |   fromLinkTarget,
74 |   fromLinkTargetToPosixPath,
75 |   fromLinkTargetToWindowsPath,
76 | 
77 |   ) where
78 | 
79 | import Codec.Archive.Tar.Types
80 | import Codec.Archive.Tar.Pack
81 | 


--------------------------------------------------------------------------------
/tar.cabal:
--------------------------------------------------------------------------------
  1 | name:            tar
  2 | version:         0.5.0.1
  3 | license:         BSD3
  4 | license-file:    LICENSE
  5 | author:          Duncan Coutts <duncan@community.haskell.org>
  6 |                  Bjorn Bringert <bjorn@bringert.net>
  7 | maintainer:      Duncan Coutts <duncan@community.haskell.org>
  8 | bug-reports:     https://github.com/haskell/tar/issues
  9 | copyright:       2007 Bjorn Bringert <bjorn@bringert.net>
 10 |                  2008-2016 Duncan Coutts <duncan@community.haskell.org>
 11 | category:        Codec
 12 | synopsis:        Reading, writing and manipulating ".tar" archive files.
 13 | description:     This library is for working with \"@.tar@\" archive files. It
 14 |                  can read and write a range of common variations of archive
 15 |                  format including V7, POSIX USTAR and GNU formats.
 16 |                  .
 17 |                  It provides support for packing and unpacking portable
 18 |                  archives. This makes it suitable for distribution but not
 19 |                  backup because details like file ownership and exact
 20 |                  permissions are not preserved.
 21 |                  .
 22 |                  It also provides features for random access to archive
 23 |                  content using an index.
 24 | build-type:      Simple
 25 | cabal-version:   >=1.8
 26 | extra-source-files: changelog.md
 27 | tested-with:     GHC==6.10.4, GHC==6.12.3, GHC==7.0.4, GHC==7.2.2, GHC==7.4.2,
 28 |                  GHC==7.6.3,  GHC==7.8.4, GHC==7.10.2, GHC==8.1
 29 | 
 30 | source-repository head
 31 |   type: git
 32 |   location: https://github.com/haskell/tar.git
 33 | 
 34 | flag old-time
 35 |   default: False
 36 | 
 37 | flag old-bytestring
 38 |   default: False
 39 | 
 40 | library
 41 |   build-depends: base == 4.*,
 42 |                  filepath,
 43 |                  directory,
 44 |                  array,
 45 |                  containers >= 0.2,
 46 |                  deepseq >= 1.1 && < 1.5
 47 |   if flag(old-time)
 48 |     build-depends: directory < 1.2, old-time
 49 |   else
 50 |     build-depends: directory >= 1.2, time
 51 | 
 52 |   if flag(old-bytestring)
 53 |     build-depends: bytestring-builder, bytestring >= 0.9 && <0.10
 54 |   else
 55 |     build-depends: bytestring >= 0.10
 56 | 
 57 |   exposed-modules:
 58 |     Codec.Archive.Tar
 59 |     Codec.Archive.Tar.Entry
 60 |     Codec.Archive.Tar.Check
 61 |     Codec.Archive.Tar.Index
 62 |     Codec.Archive.Tar.Types
 63 |     Codec.Archive.Tar.Read
 64 |     Codec.Archive.Tar.Write
 65 |     Codec.Archive.Tar.Pack
 66 |     Codec.Archive.Tar.Unpack
 67 |     Codec.Archive.Tar.Index.StringTable
 68 |     Codec.Archive.Tar.Index.IntTrie
 69 | 
 70 |   other-extensions:
 71 |     CPP, BangPatterns,
 72 |     DeriveDataTypeable, ScopedTypeVariables
 73 | 
 74 |   ghc-options: -Wall -fno-warn-unused-imports
 75 | 
 76 | test-suite properties
 77 |   type:          exitcode-stdio-1.0
 78 |   build-depends: base,
 79 |                  filepath,
 80 |                  array,
 81 |                  containers,
 82 |                  deepseq,
 83 |                  bytestring-handle,
 84 |                  QuickCheck == 2.*,
 85 |                  tasty            >= 0.10 && <0.12,
 86 |                  tasty-quickcheck == 0.8.*
 87 | 
 88 |   if flag(old-time)
 89 |     build-depends: directory < 1.2, old-time
 90 |   else
 91 |     build-depends: directory >= 1.2, time
 92 | 
 93 |   if flag(old-bytestring)
 94 |     build-depends: bytestring-builder, bytestring >= 0.9 && <0.10
 95 |   else
 96 |     build-depends: bytestring >= 0.10
 97 | 
 98 |   hs-source-dirs: . test
 99 | 
100 |   main-is: test/Properties.hs
101 |   cpp-options: -DTESTS
102 | 
103 |   other-modules:
104 |     Codec.Archive.Tar.Index
105 |     Codec.Archive.Tar.Index.StringTable
106 |     Codec.Archive.Tar.Index.IntTrie
107 | 
108 |   other-extensions:
109 |     CPP, BangPatterns,
110 |     DeriveDataTypeable, ScopedTypeVariables
111 | 
112 |   ghc-options: -fno-ignore-asserts
113 | 
114 | benchmark bench
115 |   type:          exitcode-stdio-1.0
116 |   hs-source-dirs: . bench
117 |   main-is:       bench/Main.hs
118 |   build-depends: base,
119 |                  bytestring,
120 |                  filepath, directory,
121 |                  array,
122 |                  containers,
123 |                  deepseq,
124 |                  old-time, time,
125 |                  criterion >= 1.0
126 | 
127 | 


--------------------------------------------------------------------------------
/Codec/Archive/Tar/Unpack.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP #-}
  2 | -----------------------------------------------------------------------------
  3 | -- |
  4 | -- Module      :  Codec.Archive.Tar
  5 | -- Copyright   :  (c) 2007 Bjorn Bringert,
  6 | --                    2008 Andrea Vezzosi,
  7 | --                    2008-2009, 2012, 2016 Duncan Coutts
  8 | -- License     :  BSD3
  9 | --
 10 | -- Maintainer  :  duncan@community.haskell.org
 11 | -- Portability :  portable
 12 | --
 13 | -----------------------------------------------------------------------------
 14 | module Codec.Archive.Tar.Unpack (
 15 |   unpack,
 16 |   ) where
 17 | 
 18 | import Codec.Archive.Tar.Types
 19 | import Codec.Archive.Tar.Check
 20 | 
 21 | import qualified Data.ByteString.Lazy as BS
 22 | import System.FilePath
 23 |          ( (</>) )
 24 | import qualified System.FilePath as FilePath.Native
 25 |          ( takeDirectory )
 26 | import System.Directory
 27 |          ( createDirectoryIfMissing, copyFile )
 28 | import Control.Exception
 29 |          ( Exception, throwIO )
 30 | #if MIN_VERSION_directory(1,2,3)
 31 | import System.Directory
 32 |          ( setModificationTime )
 33 | import Data.Time.Clock.POSIX
 34 |          ( posixSecondsToUTCTime )
 35 | import Control.Exception as Exception
 36 |          ( catch )
 37 | import System.IO.Error
 38 |          ( isPermissionError )
 39 | #endif
 40 | 
 41 | 
 42 | -- | Create local files and directories based on the entries of a tar archive.
 43 | --
 44 | -- This is a portable implementation of unpacking suitable for portable
 45 | -- archives. It handles 'NormalFile' and 'Directory' entries and has simulated
 46 | -- support for 'SymbolicLink' and 'HardLink' entries. Links are implemented by
 47 | -- copying the target file. This therefore works on Windows as well as Unix.
 48 | -- All other entry types are ignored, that is they are not unpacked and no
 49 | -- exception is raised.
 50 | --
 51 | -- If the 'Entries' ends in an error then it is raised an an exception. Any
 52 | -- files or directories that have been unpacked before the error was
 53 | -- encountered will not be deleted. For this reason you may want to unpack
 54 | -- into an empty directory so that you can easily clean up if unpacking fails
 55 | -- part-way.
 56 | --
 57 | -- On its own, this function only checks for security (using 'checkSecurity').
 58 | -- You can do other checks by applying checking functions to the 'Entries' that
 59 | -- you pass to this function. For example:
 60 | --
 61 | -- > unpack dir (checkTarbomb expectedDir entries)
 62 | --
 63 | -- If you care about the priority of the reported errors then you may want to
 64 | -- use 'checkSecurity' before 'checkTarbomb' or other checks.
 65 | --
 66 | unpack :: Exception e => FilePath -> Entries e -> IO ()
 67 | unpack baseDir entries = unpackEntries [] (checkSecurity entries)
 68 |                      >>= emulateLinks
 69 | 
 70 |   where
 71 |     -- We're relying here on 'checkSecurity' to make sure we're not scribbling
 72 |     -- files all over the place.
 73 | 
 74 |     unpackEntries _     (Fail err)      = either throwIO throwIO err
 75 |     unpackEntries links Done            = return links
 76 |     unpackEntries links (Next entry es) = case entryContent entry of
 77 |       NormalFile file _ -> extractFile path file mtime
 78 |                         >> unpackEntries links es
 79 |       Directory         -> extractDir path mtime
 80 |                         >> unpackEntries links es
 81 |       HardLink     link -> (unpackEntries $! saveLink path link links) es
 82 |       SymbolicLink link -> (unpackEntries $! saveLink path link links) es
 83 |       _                 -> unpackEntries links es --ignore other file types
 84 |       where
 85 |         path  = entryPath entry
 86 |         mtime = entryTime entry
 87 | 
 88 |     extractFile path content mtime = do
 89 |       -- Note that tar archives do not make sure each directory is created
 90 |       -- before files they contain, indeed we may have to create several
 91 |       -- levels of directory.
 92 |       createDirectoryIfMissing True absDir
 93 |       BS.writeFile absPath content
 94 |       setModTime absPath mtime
 95 |       where
 96 |         absDir  = baseDir </> FilePath.Native.takeDirectory path
 97 |         absPath = baseDir </> path
 98 | 
 99 |     extractDir path mtime = do
100 |       createDirectoryIfMissing True absPath
101 |       setModTime absPath mtime
102 |       where
103 |         absPath = baseDir </> path
104 | 
105 |     saveLink path link links = seq (length path)
106 |                              $ seq (length link')
107 |                              $ (path, link'):links
108 |       where link' = fromLinkTarget link
109 | 
110 |     emulateLinks = mapM_ $ \(relPath, relLinkTarget) ->
111 |       let absPath   = baseDir </> relPath
112 |           absTarget = FilePath.Native.takeDirectory absPath </> relLinkTarget
113 |        in copyFile absTarget absPath
114 | 
115 | setModTime :: FilePath -> EpochTime -> IO ()
116 | #if MIN_VERSION_directory(1,2,3)
117 | -- functionality only supported as of directory-1.2.3.x
118 | setModTime path t =
119 |     setModificationTime path (posixSecondsToUTCTime (fromIntegral t))
120 |       `Exception.catch` \e ->
121 |         if isPermissionError e then return () else throwIO e
122 | #else
123 | setModTime _path _t = return ()
124 | #endif
125 | 


--------------------------------------------------------------------------------
/Codec/Archive/Tar/Write.hs:
--------------------------------------------------------------------------------
  1 | -----------------------------------------------------------------------------
  2 | -- |
  3 | -- Module      :  Codec.Archive.Tar.Write
  4 | -- Copyright   :  (c) 2007 Bjorn Bringert,
  5 | --                    2008 Andrea Vezzosi,
  6 | --                    2008-2009 Duncan Coutts
  7 | -- License     :  BSD3
  8 | --
  9 | -- Maintainer  :  duncan@community.haskell.org
 10 | -- Portability :  portable
 11 | --
 12 | -----------------------------------------------------------------------------
 13 | module Codec.Archive.Tar.Write (write) where
 14 | 
 15 | import Codec.Archive.Tar.Types
 16 | 
 17 | import Data.Char     (ord)
 18 | import Data.List     (foldl')
 19 | import Data.Monoid   (mempty)
 20 | import Numeric       (showOct)
 21 | 
 22 | import qualified Data.ByteString             as BS
 23 | import qualified Data.ByteString.Char8       as BS.Char8
 24 | import qualified Data.ByteString.Lazy        as LBS
 25 | import qualified Data.ByteString.Lazy.Char8  as LBS.Char8
 26 | 
 27 | 
 28 | -- | Create the external representation of a tar archive by serialising a list
 29 | -- of tar entries.
 30 | --
 31 | -- * The conversion is done lazily.
 32 | --
 33 | write :: [Entry] -> LBS.ByteString
 34 | write es = LBS.concat $ map putEntry es ++ [LBS.replicate (512*2) 0]
 35 | 
 36 | putEntry :: Entry -> LBS.ByteString
 37 | putEntry entry = case entryContent entry of
 38 |   NormalFile       content size -> LBS.concat [ header, content, padding size ]
 39 |   OtherEntryType _ content size -> LBS.concat [ header, content, padding size ]
 40 |   _                             -> header
 41 |   where
 42 |     header       = putHeader entry
 43 |     padding size = LBS.replicate paddingSize 0
 44 |       where paddingSize = fromIntegral (negate size `mod` 512)
 45 | 
 46 | putHeader :: Entry -> LBS.ByteString
 47 | putHeader entry =
 48 |      LBS.Char8.pack
 49 |    $ take 148 block
 50 |   ++ putOct 7 checksum
 51 |   ++ ' ' : drop 156 block
 52 | --  ++ putOct 8 checksum
 53 | --  ++ drop 156 block
 54 |   where
 55 |     block    = putHeaderNoChkSum entry
 56 |     checksum = foldl' (\x y -> x + ord y) 0 block
 57 | 
 58 | putHeaderNoChkSum :: Entry -> String
 59 | putHeaderNoChkSum Entry {
 60 |     entryTarPath     = TarPath name prefix,
 61 |     entryContent     = content,
 62 |     entryPermissions = permissions,
 63 |     entryOwnership   = ownership,
 64 |     entryTime        = modTime,
 65 |     entryFormat      = format
 66 |   } =
 67 | 
 68 |   concat
 69 |     [ putBString 100 $ name
 70 |     , putOct       8 $ permissions
 71 |     , putOct       8 $ ownerId ownership
 72 |     , putOct       8 $ groupId ownership
 73 |     , putOct      12 $ contentSize
 74 |     , putOct      12 $ modTime
 75 |     , fill         8 $ ' ' -- dummy checksum
 76 |     , putChar8       $ typeCode
 77 |     , putBString 100 $ linkTarget
 78 |     ] ++
 79 |   case format of
 80 |   V7Format    ->
 81 |       fill 255 '\NUL'
 82 |   UstarFormat -> concat
 83 |     [ putBString   8 $ ustarMagic
 84 |     , putString   32 $ ownerName ownership
 85 |     , putString   32 $ groupName ownership
 86 |     , putOct       8 $ deviceMajor
 87 |     , putOct       8 $ deviceMinor
 88 |     , putBString 155 $ prefix
 89 |     , fill        12 $ '\NUL'
 90 |     ]
 91 |   GnuFormat -> concat
 92 |     [ putBString   8 $ gnuMagic
 93 |     , putString   32 $ ownerName ownership
 94 |     , putString   32 $ groupName ownership
 95 |     , putGnuDev    8 $ deviceMajor
 96 |     , putGnuDev    8 $ deviceMinor
 97 |     , putBString 155 $ prefix
 98 |     , fill        12 $ '\NUL'
 99 |     ]
100 |   where
101 |     (typeCode, contentSize, linkTarget,
102 |      deviceMajor, deviceMinor) = case content of
103 |        NormalFile      _ size            -> ('0' , size, mempty, 0,     0)
104 |        Directory                         -> ('5' , 0,    mempty, 0,     0)
105 |        SymbolicLink    (LinkTarget link) -> ('2' , 0,    link,   0,     0)
106 |        HardLink        (LinkTarget link) -> ('1' , 0,    link,   0,     0)
107 |        CharacterDevice major minor       -> ('3' , 0,    mempty, major, minor)
108 |        BlockDevice     major minor       -> ('4' , 0,    mempty, major, minor)
109 |        NamedPipe                         -> ('6' , 0,    mempty, 0,     0)
110 |        OtherEntryType  code _ size       -> (code, size, mempty, 0,     0)
111 | 
112 |     putGnuDev w n = case content of
113 |       CharacterDevice _ _ -> putOct w n
114 |       BlockDevice     _ _ -> putOct w n
115 |       _                   -> replicate w '\NUL'
116 | 
117 | ustarMagic, gnuMagic :: BS.ByteString
118 | ustarMagic = BS.Char8.pack "ustar\NUL00"
119 | gnuMagic   = BS.Char8.pack "ustar  \NUL"
120 | 
121 | -- * TAR format primitive output
122 | 
123 | type FieldWidth = Int
124 | 
125 | putBString :: FieldWidth -> BS.ByteString -> String
126 | putBString n s = BS.Char8.unpack (BS.take n s) ++ fill (n - BS.length s) '\NUL'
127 | 
128 | putString :: FieldWidth -> String -> String
129 | putString n s = take n s ++ fill (n - length s) '\NUL'
130 | 
131 | --TODO: check integer widths, eg for large file sizes
132 | putOct :: (Integral a, Show a) => FieldWidth -> a -> String
133 | putOct n x =
134 |   let octStr = take (n-1) $ showOct x ""
135 |    in fill (n - length octStr - 1) '0'
136 |    ++ octStr
137 |    ++ putChar8 '\NUL'
138 | 
139 | putChar8 :: Char -> String
140 | putChar8 c = [c]
141 | 
142 | fill :: FieldWidth -> Char -> String
143 | fill n c = replicate n c
144 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
  1 | # This file has been generated -- see https://github.com/hvr/multi-ghc-travis
  2 | language: c
  3 | sudo: false
  4 | 
  5 | cache:
  6 |   directories:
  7 |     - $HOME/.cabsnap
  8 |     - $HOME/.cabal/packages
  9 | 
 10 | before_cache:
 11 |   - rm -fv $HOME/.cabal/packages/hackage.haskell.org/build-reports.log
 12 |   - rm -fv $HOME/.cabal/packages/hackage.haskell.org/00-index.tar
 13 | 
 14 | matrix:
 15 |   include:
 16 |     - env: CABALVER=1.16 GHCVER=7.0.4 NOTEST=1
 17 |       compiler: ": #GHC 7.0.4"
 18 |       addons: {apt: {packages: [cabal-install-1.16,ghc-7.0.4,zlib1g-dev], sources: [hvr-ghc]}}
 19 |     - env: CABALVER=1.16 GHCVER=7.0.4 NOTEST=1 CABALCONFIG=bytestring-0.9.cabal.config
 20 |       compiler: ": #GHC 7.0.4 bytestring-0.9"
 21 |       addons: {apt: {packages: [cabal-install-1.16,ghc-7.0.4,zlib1g-dev], sources: [hvr-ghc]}}
 22 |     - env: CABALVER=1.16 GHCVER=7.2.2 NOTEST=1
 23 |       compiler: ": #GHC 7.2.2"
 24 |       addons: {apt: {packages: [cabal-install-1.16,ghc-7.2.2,zlib1g-dev], sources: [hvr-ghc]}}
 25 |     - env: CABALVER=1.16 GHCVER=7.2.2 NOTEST=1 CABALCONFIG=bytestring-0.9.cabal.config
 26 |       compiler: ": #GHC 7.2.2 bytestring-0.9"
 27 |       addons: {apt: {packages: [cabal-install-1.16,ghc-7.2.2,zlib1g-dev], sources: [hvr-ghc]}}
 28 |     - env: CABALVER=1.22 GHCVER=7.4.2
 29 |       compiler: ": #GHC 7.4.2"
 30 |       addons: {apt: {packages: [cabal-install-1.22,ghc-7.4.2,zlib1g-dev], sources: [hvr-ghc]}}
 31 |     - env: CABALVER=1.22 GHCVER=7.4.2 CABALCONFIG=bytestring-0.9.cabal.config
 32 |       compiler: ": #GHC 7.4.2 bytestring-0.9"
 33 |       addons: {apt: {packages: [cabal-install-1.22,ghc-7.4.2,zlib1g-dev], sources: [hvr-ghc]}}
 34 |     - env: CABALVER=1.16 GHCVER=7.6.3
 35 |       compiler: ": #GHC 7.6.3"
 36 |       addons: {apt: {packages: [cabal-install-1.16,ghc-7.6.3,zlib1g-dev], sources: [hvr-ghc]}}
 37 |     - env: CABALVER=1.18 GHCVER=7.8.4
 38 |       compiler: ": #GHC 7.8.4"
 39 |       addons: {apt: {packages: [cabal-install-1.18,ghc-7.8.4,zlib1g-dev], sources: [hvr-ghc]}}
 40 |     - env: CABALVER=1.22 GHCVER=7.10.2
 41 |       compiler: ": #GHC 7.10.2"
 42 |       addons: {apt: {packages: [cabal-install-1.22,ghc-7.10.2,zlib1g-dev], sources: [hvr-ghc]}}
 43 |     - env: CABALVER=head GHCVER=head NOTEST=1
 44 |       compiler: ": #GHC head"
 45 |       addons: {apt: {packages: [cabal-install-head,ghc-head,zlib1g-dev], sources: [hvr-ghc]}}
 46 |     - env: CABALVER=head GHCVER=head
 47 |       compiler: ": #GHC head"
 48 |       addons: {apt: {packages: [cabal-install-head,ghc-head,zlib1g-dev], sources: [hvr-ghc]}}
 49 | 
 50 |   allow_failures:
 51 |     - env: CABALVER=head GHCVER=head
 52 | 
 53 | before_install:
 54 |  - unset CC
 55 |  - export PATH=/opt/ghc/$GHCVER/bin:/opt/cabal/$CABALVER/bin:$PATH
 56 |  - if [ "x$CABALCONFIG" = "x" ]; then true; else cp $CABALCONFIG cabal.config; fi
 57 |  - if [ "x$NOTEST" = "x" ]; then export CABFLAGS="--enable-tests"; else export CABFLAGS=""; fi
 58 | 
 59 | install:
 60 |  - cabal --version
 61 |  - echo "$(ghc --version) [$(ghc --print-project-git-commit-id 2> /dev/null || echo '?')]"
 62 |  - if [ -f $HOME/.cabal/packages/hackage.haskell.org/00-index.tar.gz ];
 63 |    then
 64 |      zcat $HOME/.cabal/packages/hackage.haskell.org/00-index.tar.gz >
 65 |           $HOME/.cabal/packages/hackage.haskell.org/00-index.tar;
 66 |    fi
 67 |  - travis_retry cabal update -v
 68 |  - sed -i 's/^jobs:/-- jobs:/' ${HOME}/.cabal/config
 69 |  - cabal install --only-dependencies $CABFLAGS --dry -v > installplan.txt
 70 |  - sed -i -e '1,/^Resolving /d' installplan.txt; cat installplan.txt
 71 | 
 72 | # check whether current requested install-plan matches cached package-db snapshot
 73 |  - if diff -u installplan.txt $HOME/.cabsnap/installplan.txt;
 74 |    then
 75 |      echo "cabal build-cache HIT";
 76 |      rm -rfv .ghc;
 77 |      cp -a $HOME/.cabsnap/ghc $HOME/.ghc;
 78 |      cp -a $HOME/.cabsnap/lib $HOME/.cabsnap/share $HOME/.cabsnap/bin $HOME/.cabal/;
 79 |    else
 80 |      echo "cabal build-cache MISS";
 81 |      rm -rf $HOME/.cabsnap;
 82 |      mkdir -p $HOME/.ghc $HOME/.cabal/lib $HOME/.cabal/share $HOME/.cabal/bin;
 83 |      cabal install --only-dependencies $CABFLAGS;
 84 |    fi
 85 |  
 86 | # snapshot package-db on cache miss
 87 |  - if [ ! -d $HOME/.cabsnap ];
 88 |    then
 89 |       echo "snapshotting package-db to build-cache";
 90 |       mkdir $HOME/.cabsnap;
 91 |       cp -a $HOME/.ghc $HOME/.cabsnap/ghc;
 92 |       cp -a $HOME/.cabal/lib $HOME/.cabal/share $HOME/.cabal/bin installplan.txt $HOME/.cabsnap/;
 93 |    fi
 94 | 
 95 | # Here starts the actual work to be performed for the package under test;
 96 | # any command which exits with a non-zero exit code causes the build to fail.
 97 | script:
 98 |  - if [ -f configure.ac ]; then autoreconf -i; fi
 99 |  - cabal configure $CABFLAGS -v2  # -v2 provides useful information for debugging
100 |  - cabal build   # this builds all libraries and executables (including tests/benchmarks)
101 |  - if [ "x$NOTEST" = "x" ]; then cabal test; fi
102 |  - cabal sdist   # tests that a source-distribution can be generated
103 | 
104 | # Check that the resulting source distribution can be built & installed.
105 | # If there are no other `.tar.gz` files in `dist`, this can be even simpler:
106 | # `cabal install --force-reinstalls dist/*-*.tar.gz`
107 |  - SRC_TGZ=$(cabal info . | awk '{print $2;exit}').tar.gz &&
108 |    (cd dist && cabal install --force-reinstalls "$SRC_TGZ")
109 | 
110 | # EOF
111 | 


--------------------------------------------------------------------------------
/Codec/Archive/Tar/Pack.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP #-}
  2 | -----------------------------------------------------------------------------
  3 | -- |
  4 | -- Module      :  Codec.Archive.Tar
  5 | -- Copyright   :  (c) 2007 Bjorn Bringert,
  6 | --                    2008 Andrea Vezzosi,
  7 | --                    2008-2009, 2012, 2016 Duncan Coutts
  8 | -- License     :  BSD3
  9 | --
 10 | -- Maintainer  :  duncan@community.haskell.org
 11 | -- Portability :  portable
 12 | --
 13 | -----------------------------------------------------------------------------
 14 | module Codec.Archive.Tar.Pack (
 15 |     pack,
 16 |     packFileEntry,
 17 |     packDirectoryEntry,
 18 | 
 19 |     getDirectoryContentsRecursive,
 20 |   ) where
 21 | 
 22 | import Codec.Archive.Tar.Types
 23 | 
 24 | import qualified Data.ByteString.Lazy as BS
 25 | import System.FilePath
 26 |          ( (</>) )
 27 | import qualified System.FilePath as FilePath.Native
 28 |          ( addTrailingPathSeparator, hasTrailingPathSeparator )
 29 | import System.Directory
 30 |          ( getDirectoryContents, doesDirectoryExist, getModificationTime
 31 |          , Permissions(..), getPermissions )
 32 | #if MIN_VERSION_directory(1,2,0)
 33 | -- The directory package switched to the new time package
 34 | import Data.Time.Clock
 35 |          ( UTCTime )
 36 | import Data.Time.Clock.POSIX
 37 |          ( utcTimeToPOSIXSeconds )
 38 | #else
 39 | import System.Time
 40 |          ( ClockTime(..) )
 41 | #endif
 42 | import System.IO
 43 |          ( IOMode(ReadMode), openBinaryFile, hFileSize )
 44 | import System.IO.Unsafe (unsafeInterleaveIO)
 45 | 
 46 | -- | Creates a tar archive from a list of directory or files. Any directories
 47 | -- specified will have their contents included recursively. Paths in the
 48 | -- archive will be relative to the given base directory.
 49 | --
 50 | -- This is a portable implementation of packing suitable for portable archives.
 51 | -- In particular it only constructs 'NormalFile' and 'Directory' entries. Hard
 52 | -- links and symbolic links are treated like ordinary files. It cannot be used
 53 | -- to pack directories containing recursive symbolic links. Special files like
 54 | -- FIFOs (named pipes), sockets or device files will also cause problems.
 55 | --
 56 | -- An exception will be thrown for any file names that are too long to
 57 | -- represent as a 'TarPath'.
 58 | --
 59 | -- * This function returns results lazily. Subdirectories are scanned
 60 | -- and files are read one by one as the list of entries is consumed.
 61 | --
 62 | pack :: FilePath   -- ^ Base directory
 63 |      -> [FilePath] -- ^ Files and directories to pack, relative to the base dir
 64 |      -> IO [Entry]
 65 | pack baseDir paths0 = preparePaths baseDir paths0 >>= packPaths baseDir
 66 | 
 67 | preparePaths :: FilePath -> [FilePath] -> IO [FilePath]
 68 | preparePaths baseDir paths =
 69 |   fmap concat $ interleave
 70 |     [ do isDir  <- doesDirectoryExist (baseDir </> path)
 71 |          if isDir
 72 |            then do entries <- getDirectoryContentsRecursive (baseDir </> path)
 73 |                    let entries' = map (path </>) entries
 74 |                        dir = FilePath.Native.addTrailingPathSeparator path
 75 |                    if null path then return entries'
 76 |                                 else return (dir : entries')
 77 |            else return [path]
 78 |     | path <- paths ]
 79 | 
 80 | packPaths :: FilePath -> [FilePath] -> IO [Entry]
 81 | packPaths baseDir paths =
 82 |   interleave
 83 |     [ do tarpath <- either fail return (toTarPath isDir relpath)
 84 |          if isDir then packDirectoryEntry filepath tarpath
 85 |                   else packFileEntry      filepath tarpath
 86 |     | relpath <- paths
 87 |     , let isDir    = FilePath.Native.hasTrailingPathSeparator filepath
 88 |           filepath = baseDir </> relpath ]
 89 | 
 90 | interleave :: [IO a] -> IO [a]
 91 | interleave = unsafeInterleaveIO . go
 92 |   where
 93 |     go []     = return []
 94 |     go (x:xs) = do
 95 |       x'  <- x
 96 |       xs' <- interleave xs
 97 |       return (x':xs')
 98 | 
 99 | -- | Construct a tar 'Entry' based on a local file.
100 | --
101 | -- This sets the entry size, the data contained in the file and the file's
102 | -- modification time. If the file is executable then that information is also
103 | -- preserved. File ownership and detailed permissions are not preserved.
104 | --
105 | -- * The file contents is read lazily.
106 | --
107 | packFileEntry :: FilePath -- ^ Full path to find the file on the local disk
108 |               -> TarPath  -- ^ Path to use for the tar Entry in the archive
109 |               -> IO Entry
110 | packFileEntry filepath tarpath = do
111 |   mtime   <- getModTime filepath
112 |   perms   <- getPermissions filepath
113 |   file    <- openBinaryFile filepath ReadMode
114 |   size    <- hFileSize file
115 |   content <- BS.hGetContents file
116 |   return (simpleEntry tarpath (NormalFile content (fromIntegral size))) {
117 |     entryPermissions = if executable perms then executableFilePermissions
118 |                                            else ordinaryFilePermissions,
119 |     entryTime = mtime
120 |   }
121 | 
122 | -- | Construct a tar 'Entry' based on a local directory (but not its contents).
123 | --
124 | -- The only attribute of the directory that is used is its modification time.
125 | -- Directory ownership and detailed permissions are not preserved.
126 | --
127 | packDirectoryEntry :: FilePath -- ^ Full path to find the file on the local disk
128 |                    -> TarPath  -- ^ Path to use for the tar Entry in the archive
129 |                    -> IO Entry
130 | packDirectoryEntry filepath tarpath = do
131 |   mtime   <- getModTime filepath
132 |   return (directoryEntry tarpath) {
133 |     entryTime = mtime
134 |   }
135 | 
136 | -- | This is a utility function, much like 'getDirectoryContents'. The
137 | -- difference is that it includes the contents of subdirectories.
138 | --
139 | -- The paths returned are all relative to the top directory. Directory paths
140 | -- are distinguishable by having a trailing path separator
141 | -- (see 'FilePath.Native.hasTrailingPathSeparator').
142 | --
143 | -- All directories are listed before the files that they contain. Amongst the
144 | -- contents of a directory, subdirectories are listed after normal files. The
145 | -- overall result is that files within a directory will be together in a single
146 | -- contiguous group. This tends to improve file layout and IO performance when
147 | -- creating or extracting tar archives.
148 | --
149 | -- * This function returns results lazily. Subdirectories are not scanned
150 | -- until the files entries in the parent directory have been consumed.
151 | --
152 | getDirectoryContentsRecursive :: FilePath -> IO [FilePath]
153 | getDirectoryContentsRecursive dir0 =
154 |   fmap tail (recurseDirectories dir0 [""])
155 | 
156 | recurseDirectories :: FilePath -> [FilePath] -> IO [FilePath]
157 | recurseDirectories _    []         = return []
158 | recurseDirectories base (dir:dirs) = unsafeInterleaveIO $ do
159 |   (files, dirs') <- collect [] [] =<< getDirectoryContents (base </> dir)
160 | 
161 |   files' <- recurseDirectories base (dirs' ++ dirs)
162 |   return (dir : files ++ files')
163 | 
164 |   where
165 |     collect files dirs' []              = return (reverse files, reverse dirs')
166 |     collect files dirs' (entry:entries) | ignore entry
167 |                                         = collect files dirs' entries
168 |     collect files dirs' (entry:entries) = do
169 |       let dirEntry  = dir </> entry
170 |           dirEntry' = FilePath.Native.addTrailingPathSeparator dirEntry
171 |       isDirectory <- doesDirectoryExist (base </> dirEntry)
172 |       if isDirectory
173 |         then collect files (dirEntry':dirs') entries
174 |         else collect (dirEntry:files) dirs' entries
175 | 
176 |     ignore ['.']      = True
177 |     ignore ['.', '.'] = True
178 |     ignore _          = False
179 | 
180 | getModTime :: FilePath -> IO EpochTime
181 | getModTime path = do
182 | #if MIN_VERSION_directory(1,2,0)
183 |   -- The directory package switched to the new time package
184 |   t <- getModificationTime path
185 |   return . floor . utcTimeToPOSIXSeconds $ t
186 | #else
187 |   (TOD s _) <- getModificationTime path
188 |   return $! fromIntegral s
189 | #endif
190 | 


--------------------------------------------------------------------------------
/htar/htar.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP #-}
  2 | module Main where
  3 | 
  4 | import qualified Codec.Archive.Tar       as Tar
  5 | import qualified Codec.Archive.Tar.Entry as Tar
  6 | 
  7 | import qualified Codec.Compression.GZip as GZip (compress, decompress)
  8 | import qualified Codec.Compression.BZip as BZip (compress, decompress)
  9 | 
 10 | import Control.Exception     (throwIO)
 11 | import qualified Data.ByteString.Lazy as BS
 12 | import Data.ByteString.Lazy  (ByteString)
 13 | import Data.Bits             (testBit)
 14 | import Data.Char             (toUpper)
 15 | import System.Console.GetOpt (OptDescr(..), ArgDescr(..), ArgOrder(..),
 16 |                               getOpt', usageInfo)
 17 | import System.Environment    (getArgs)
 18 | import System.Exit           (exitFailure)
 19 | import System.IO             (hPutStrLn, stderr)
 20 | import Data.Time             (formatTime)
 21 | import Data.Time.Clock.POSIX (posixSecondsToUTCTime)
 22 | #if MIN_VERSION_time(1,5,0)
 23 | import Data.Time             (defaultTimeLocale)
 24 | #else
 25 | import System.Locale         (defaultTimeLocale)
 26 | #endif
 27 | 
 28 | main :: IO ()
 29 | main = do
 30 |   (opts, files) <- parseOptions =<< getArgs
 31 |   main' opts files
 32 | 
 33 | main' :: Options -> [FilePath] -> IO ()
 34 | main' (Options { optFile        = file,
 35 |                  optDir         = dir,
 36 |                  optAction      = action,
 37 |                  optCompression = compression,
 38 |                  optVerbosity   = verbosity }) files =
 39 |   case action of
 40 |     NoAction -> die ["No action given. Specify one of -c, -t or -x."]
 41 |     Help     -> printUsage
 42 |     Create   -> output . compress compression
 43 |                        . Tar.write =<< Tar.pack dir files
 44 |     Extract  -> Tar.unpack dir . Tar.read . decompress compression =<< input
 45 |     List     -> printEntries . Tar.read . decompress compression =<< input
 46 |     Append    | compression /= None
 47 |              -> die ["Append cannot be used together with compression."]
 48 |               | file == "-"
 49 |              -> die ["Append must be used on a file, not stdin/stdout."]
 50 |               | otherwise
 51 |              -> Tar.append file dir files
 52 |   where
 53 |     input  = if file == "-" then BS.getContents else BS.readFile  file
 54 |     output = if file == "-" then BS.putStr      else BS.writeFile file
 55 | 
 56 |     printEntries = Tar.foldEntries (\entry rest -> printEntry entry >> rest)
 57 |                                    (return ()) throwIO
 58 |     printEntry = putStrLn . entryInfo verbosity
 59 | 
 60 | data Compression = None | GZip | BZip
 61 |   deriving (Show, Eq)
 62 | 
 63 | compress :: Compression -> ByteString -> ByteString
 64 | compress None = id
 65 | compress GZip = GZip.compress
 66 | compress BZip = BZip.compress
 67 | 
 68 | decompress :: Compression -> ByteString -> ByteString
 69 | decompress None = id
 70 | decompress GZip = GZip.decompress
 71 | decompress BZip = BZip.decompress
 72 | 
 73 | data Verbosity = Verbose | Concise
 74 | 
 75 | ------------------------
 76 | -- List archive contents
 77 | 
 78 | entryInfo :: Verbosity -> Tar.Entry -> String
 79 | entryInfo Verbose = detailedInfo
 80 | entryInfo Concise = Tar.entryPath
 81 | 
 82 | detailedInfo :: Tar.Entry -> String
 83 | detailedInfo entry =
 84 |   unwords [ typeCode : permissions
 85 |           , justify 19 (owner ++ '/' : group) size
 86 |           , time
 87 |           , name ++ link ]
 88 |   where
 89 |     typeCode = case Tar.entryContent entry of
 90 |       Tar.HardLink        _   -> 'h'
 91 |       Tar.SymbolicLink    _   -> 'l'
 92 |       Tar.CharacterDevice _ _ -> 'c'
 93 |       Tar.BlockDevice     _ _ -> 'b'
 94 |       Tar.Directory           -> 'd'
 95 |       Tar.NamedPipe           -> 'p'
 96 |       _                       -> '-'
 97 |     permissions = concat [userPerms, groupPerms, otherPerms]
 98 |       where
 99 |         userPerms  = formatPerms 8 7 6 11 's'
100 |         groupPerms = formatPerms 5 4 3 10 's'
101 |         otherPerms = formatPerms 2 1 0  9 't'
102 |         formatPerms r w x s c =
103 |           [if testBit m r then 'r' else '-'
104 |           ,if testBit m w then 'w' else '-'
105 |           ,if testBit m s
106 |              then if testBit m x then c   else toUpper c
107 |              else if testBit m x then 'x' else '-']
108 |         m = Tar.entryPermissions entry
109 |     owner = nameOrID ownerName ownerId
110 |     group = nameOrID groupName groupId
111 |     (Tar.Ownership ownerName groupName ownerId groupId) =
112 |       Tar.entryOwnership entry
113 |     nameOrID n i = if null n then show i else n
114 |     size = case Tar.entryContent entry of
115 |              Tar.NormalFile _ fileSize -> show fileSize
116 |              _                         -> "0"
117 | 
118 |     time = formatEpochTime "%Y-%m-%d %H:%M" (Tar.entryTime entry)
119 |     name = Tar.entryPath entry
120 |     link = case Tar.entryContent entry of
121 |       Tar.HardLink     l -> " link to " ++ Tar.fromLinkTarget l
122 |       Tar.SymbolicLink l -> " -> "      ++ Tar.fromLinkTarget l
123 |       _                  -> ""
124 | 
125 | justify :: Int -> String -> String -> String
126 | justify width left right = left ++ padding ++ right
127 |   where
128 |     padding  = replicate padWidth ' '
129 |     padWidth = max 1 (width - length left - length right)
130 | 
131 | formatEpochTime :: String -> Tar.EpochTime -> String
132 | formatEpochTime f =
133 |     formatTime defaultTimeLocale f . posixSecondsToUTCTime . fromIntegral
134 | 
135 | ------------------------
136 | -- Command line handling
137 | 
138 | data Options = Options {
139 |     optFile        :: FilePath, -- "-" means stdin/stdout
140 |     optDir         :: FilePath,
141 |     optAction      :: Action,
142 |     optCompression :: Compression,
143 |     optVerbosity   :: Verbosity
144 |   }
145 | 
146 | defaultOptions :: Options
147 | defaultOptions = Options {
148 |     optFile        = "-",
149 |     optDir         = "",
150 |     optAction      = NoAction,
151 |     optCompression = None,
152 |     optVerbosity   = Concise
153 |   }
154 | 
155 | data Action = NoAction
156 |             | Help
157 |             | Create
158 |             | Extract
159 |             | List
160 |             | Append
161 |   deriving Show
162 | 
163 | optDescr :: [OptDescr (Options -> Options)]
164 | optDescr =
165 |   [ Option ['c'] ["create"]
166 |       (action Create)
167 |       "Create a new archive."
168 |   , Option ['x'] ["extract", "get"]
169 |       (action Extract)
170 |       "Extract files from an archive."
171 |   , Option ['t'] ["list"]
172 |       (action List)
173 |       "List the contents of an archive."
174 |   , Option ['r'] ["append"]
175 |       (action Append)
176 |       "Append files to the end of an archive."
177 |   , Option ['f'] ["file"]
178 |       (ReqArg (\f o -> o { optFile = f}) "ARCHIVE")
179 |       "Use archive file ARCHIVE."
180 |   , Option ['C'] ["directory"]
181 |       (ReqArg (\d o -> o { optDir = d }) "DIR")
182 |       "Create or extract relative to DIR."
183 |   , Option ['z'] ["gzip", "gunzip", "ungzip"]
184 |       (compression GZip)
185 |       "Use gzip compression."
186 |   , Option ['j'] ["bzip2"]
187 |       (compression BZip)
188 |       "Use bzip2 compression."
189 |   , Option ['v'] ["verbose"]
190 |       (NoArg (\o -> o { optVerbosity = Verbose }))
191 |       "Verbosely list files processed."
192 |   , Option ['h', '?'] ["help"]
193 |       (action Help)
194 |       "Print this help output."
195 |   ]
196 |   where
197 |     action      a = NoArg (\o -> o { optAction = a })
198 |     compression c = NoArg (\o -> o { optCompression = c })
199 | 
200 | printUsage :: IO ()
201 | printUsage = putStrLn (usageInfo headder optDescr)
202 |   where
203 |     headder = unlines ["htar creates and extracts TAR archives.",
204 |                        "",
205 |                        "Usage: htar [OPTION ...] [FILE ...]"]
206 | 
207 | parseOptions :: [String] -> IO (Options, [FilePath])
208 | parseOptions args =
209 |   let (fs, files, nonopts, errors) = getOpt' Permute optDescr args
210 |   in case (nonopts, errors) of
211 |        ([], [])    -> return $ (foldl (flip ($)) defaultOptions fs, files)
212 |        (_ , (_:_)) -> die errors
213 |        (_ ,  _)    -> die (map (("unrecognized option "++).show) nonopts)
214 | 
215 | die :: [String] -> IO a
216 | die errs = do
217 |   mapM_ (\e -> hPutStrLn stderr $ "htar: " ++ e) $ errs
218 |   hPutStrLn stderr "Try `htar --help' for more information."
219 |   exitFailure
220 | 


--------------------------------------------------------------------------------
/Codec/Archive/Tar/Check.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DeriveDataTypeable #-}
  2 | -----------------------------------------------------------------------------
  3 | -- |
  4 | -- Module      :  Codec.Archive.Tar
  5 | -- Copyright   :  (c) 2008-2012 Duncan Coutts
  6 | --                    2011 Max Bolingbroke
  7 | -- License     :  BSD3
  8 | --
  9 | -- Maintainer  :  duncan@community.haskell.org
 10 | -- Portability :  portable
 11 | --
 12 | -- Perform various checks on tar file entries.
 13 | --
 14 | -----------------------------------------------------------------------------
 15 | module Codec.Archive.Tar.Check (
 16 | 
 17 |   -- * Security
 18 |   checkSecurity,
 19 |   FileNameError(..),
 20 | 
 21 |   -- * Tarbombs
 22 |   checkTarbomb,
 23 |   TarBombError(..),
 24 | 
 25 |   -- * Portability
 26 |   checkPortability,
 27 |   PortabilityError(..),
 28 |   PortabilityPlatform,
 29 |   ) where
 30 | 
 31 | import Codec.Archive.Tar.Types
 32 | 
 33 | import Data.Typeable (Typeable)
 34 | import Control.Exception (Exception)
 35 | import Control.Monad (MonadPlus(mplus))
 36 | import qualified System.FilePath as FilePath.Native
 37 |          ( splitDirectories, isAbsolute, isValid )
 38 | 
 39 | import qualified System.FilePath.Windows as FilePath.Windows
 40 | import qualified System.FilePath.Posix   as FilePath.Posix
 41 | 
 42 | 
 43 | --------------------------
 44 | -- Security
 45 | --
 46 | 
 47 | -- | This function checks a sequence of tar entries for file name security
 48 | -- problems. It checks that:
 49 | --
 50 | -- * file paths are not absolute
 51 | --
 52 | -- * file paths do not contain any path components that are \"@..@\"
 53 | --
 54 | -- * file names are valid
 55 | --
 56 | -- These checks are from the perspective of the current OS. That means we check
 57 | -- for \"@C:\blah@\" files on Windows and \"\/blah\" files on Unix. For archive
 58 | -- entry types 'HardLink' and 'SymbolicLink' the same checks are done for the
 59 | -- link target. A failure in any entry terminates the sequence of entries with
 60 | -- an error.
 61 | --
 62 | checkSecurity :: Entries e -> Entries (Either e FileNameError)
 63 | checkSecurity = checkEntries checkEntrySecurity
 64 | 
 65 | checkEntrySecurity :: Entry -> Maybe FileNameError
 66 | checkEntrySecurity entry = case entryContent entry of
 67 |     HardLink     link -> check (entryPath entry)
 68 |                  `mplus` check (fromLinkTarget link)
 69 |     SymbolicLink link -> check (entryPath entry)
 70 |                  `mplus` check (fromLinkTarget link)
 71 |     _                 -> check (entryPath entry)
 72 | 
 73 |   where
 74 |     check name
 75 |       | FilePath.Native.isAbsolute name
 76 |       = Just $ AbsoluteFileName name
 77 | 
 78 |       | not (FilePath.Native.isValid name)
 79 |       = Just $ InvalidFileName name
 80 | 
 81 |       | any (=="..") (FilePath.Native.splitDirectories name)
 82 |       = Just $ InvalidFileName name
 83 | 
 84 |       | otherwise = Nothing
 85 | 
 86 | -- | Errors arising from tar file names being in some way invalid or dangerous
 87 | data FileNameError
 88 |   = InvalidFileName FilePath
 89 |   | AbsoluteFileName FilePath
 90 |   deriving (Typeable)
 91 | 
 92 | instance Show FileNameError where
 93 |   show = showFileNameError Nothing
 94 | 
 95 | instance Exception FileNameError
 96 | 
 97 | showFileNameError :: Maybe PortabilityPlatform -> FileNameError -> String
 98 | showFileNameError mb_plat err = case err of
 99 |     InvalidFileName  path -> "Invalid"  ++ plat ++ " file name in tar archive: " ++ show path
100 |     AbsoluteFileName path -> "Absolute" ++ plat ++ " file name in tar archive: " ++ show path
101 |   where plat = maybe "" (' ':) mb_plat
102 | 
103 | 
104 | --------------------------
105 | -- Tarbombs
106 | --
107 | 
108 | -- | This function checks a sequence of tar entries for being a \"tar bomb\".
109 | -- This means that the tar file does not follow the standard convention that
110 | -- all entries are within a single subdirectory, e.g. a file \"foo.tar\" would
111 | -- usually have all entries within the \"foo/\" subdirectory.
112 | --
113 | -- Given the expected subdirectory, this function checks all entries are within
114 | -- that subdirectroy.
115 | --
116 | -- Note: This check must be used in conjunction with 'checkSecurity'
117 | -- (or 'checkPortability').
118 | --
119 | checkTarbomb :: FilePath -> Entries e -> Entries (Either e TarBombError)
120 | checkTarbomb expectedTopDir = checkEntries (checkEntryTarbomb expectedTopDir)
121 | 
122 | checkEntryTarbomb :: FilePath -> Entry -> Maybe TarBombError
123 | checkEntryTarbomb expectedTopDir entry =
124 |   case FilePath.Native.splitDirectories (entryPath entry) of
125 |     (topDir:_) | topDir == expectedTopDir -> Nothing
126 |     _ -> Just $ TarBombError expectedTopDir
127 | 
128 | -- | An error that occurs if a tar file is a \"tar bomb\" that would extract
129 | -- files outside of the intended directory.
130 | data TarBombError = TarBombError FilePath
131 |                   deriving (Typeable)
132 | 
133 | instance Exception TarBombError
134 | 
135 | instance Show TarBombError where
136 |   show (TarBombError expectedTopDir)
137 |     = "File in tar archive is not in the expected directory " ++ show expectedTopDir
138 | 
139 | 
140 | --------------------------
141 | -- Portability
142 | --
143 | 
144 | -- | This function checks a sequence of tar entries for a number of portability
145 | -- issues. It will complain if:
146 | --
147 | -- * The old \"Unix V7\" or \"gnu\" formats are used. For maximum portability
148 | --   only the POSIX standard \"ustar\" format should be used.
149 | --
150 | -- * A non-portable entry type is used. Only ordinary files, hard links,
151 | --   symlinks and directories are portable. Device files, pipes and others are
152 | --   not portable between all common operating systems.
153 | --
154 | -- * Non-ASCII characters are used in file names. There is no agreed portable
155 | --   convention for Unicode or other extended character sets in file names in
156 | --   tar archives.
157 | --
158 | -- * File names that would not be portable to both Unix and Windows. This check
159 | --   includes characters that are valid in both systems and the \'/\' vs \'\\\'
160 | --   directory separator conventions.
161 | --
162 | checkPortability :: Entries e -> Entries (Either e PortabilityError)
163 | checkPortability = checkEntries checkEntryPortability
164 | 
165 | checkEntryPortability :: Entry -> Maybe PortabilityError
166 | checkEntryPortability entry
167 |   | entryFormat entry `elem` [V7Format, GnuFormat]
168 |   = Just $ NonPortableFormat (entryFormat entry)
169 | 
170 |   | not (portableFileType (entryContent entry))
171 |   = Just NonPortableFileType
172 | 
173 |   | not (all portableChar posixPath)
174 |   = Just $ NonPortableEntryNameChar posixPath
175 | 
176 |   | not (FilePath.Posix.isValid posixPath)
177 |   = Just $ NonPortableFileName "unix"    (InvalidFileName posixPath)
178 |   | not (FilePath.Windows.isValid windowsPath)
179 |   = Just $ NonPortableFileName "windows" (InvalidFileName windowsPath)
180 | 
181 |   | FilePath.Posix.isAbsolute posixPath
182 |   = Just $ NonPortableFileName "unix"    (AbsoluteFileName posixPath)
183 |   | FilePath.Windows.isAbsolute windowsPath
184 |   = Just $ NonPortableFileName "windows" (AbsoluteFileName windowsPath)
185 | 
186 |   | any (=="..") (FilePath.Posix.splitDirectories posixPath)
187 |   = Just $ NonPortableFileName "unix"    (InvalidFileName posixPath)
188 |   | any (=="..") (FilePath.Windows.splitDirectories windowsPath)
189 |   = Just $ NonPortableFileName "windows" (InvalidFileName windowsPath)
190 | 
191 |   | otherwise = Nothing
192 | 
193 |   where
194 |     tarPath     = entryTarPath entry
195 |     posixPath   = fromTarPathToPosixPath   tarPath
196 |     windowsPath = fromTarPathToWindowsPath tarPath
197 | 
198 |     portableFileType ftype = case ftype of
199 |       NormalFile   {} -> True
200 |       HardLink     {} -> True
201 |       SymbolicLink {} -> True
202 |       Directory       -> True
203 |       _               -> False
204 | 
205 |     portableChar c = c <= '\127'
206 | 
207 | -- | Portability problems in a tar archive
208 | data PortabilityError
209 |   = NonPortableFormat Format
210 |   | NonPortableFileType
211 |   | NonPortableEntryNameChar FilePath
212 |   | NonPortableFileName PortabilityPlatform FileNameError
213 |   deriving (Typeable)
214 | 
215 | -- | The name of a platform that portability issues arise from
216 | type PortabilityPlatform = String
217 | 
218 | instance Exception PortabilityError
219 | 
220 | instance Show PortabilityError where
221 |   show (NonPortableFormat format) = "Archive is in the " ++ fmt ++ " format"
222 |     where fmt = case format of V7Format    -> "old Unix V7 tar"
223 |                                UstarFormat -> "ustar" -- I never generate this but a user might
224 |                                GnuFormat   -> "GNU tar"
225 |   show NonPortableFileType        = "Non-portable file type in archive"
226 |   show (NonPortableEntryNameChar posixPath)
227 |     = "Non-portable character in archive entry name: " ++ show posixPath
228 |   show (NonPortableFileName platform err)
229 |     = showFileNameError (Just platform) err
230 | 
231 | 
232 | --------------------------
233 | -- Utils
234 | --
235 | 
236 | checkEntries :: (Entry -> Maybe e') -> Entries e -> Entries (Either e e')
237 | checkEntries checkEntry =
238 |   mapEntries (\entry -> maybe (Right entry) Left (checkEntry entry))
239 | 


--------------------------------------------------------------------------------
/Codec/Archive/Tar/Read.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP, DeriveDataTypeable, BangPatterns #-}
  2 | -----------------------------------------------------------------------------
  3 | -- |
  4 | -- Module      :  Codec.Archive.Tar.Read
  5 | -- Copyright   :  (c) 2007 Bjorn Bringert,
  6 | --                    2008 Andrea Vezzosi,
  7 | --                    2008-2009 Duncan Coutts,
  8 | --                    2011 Max Bolingbroke
  9 | -- License     :  BSD3
 10 | --
 11 | -- Maintainer  :  duncan@community.haskell.org
 12 | -- Portability :  portable
 13 | --
 14 | -----------------------------------------------------------------------------
 15 | module Codec.Archive.Tar.Read (read, FormatError(..)) where
 16 | 
 17 | import Codec.Archive.Tar.Types
 18 | 
 19 | import Data.Char     (ord)
 20 | import Data.Int      (Int64)
 21 | import Data.Bits     (Bits(shiftL))
 22 | import Control.Exception (Exception(..))
 23 | import Data.Typeable (Typeable)
 24 | import Control.Applicative
 25 | import Control.Monad
 26 | import Control.DeepSeq
 27 | 
 28 | import qualified Data.ByteString        as BS
 29 | import qualified Data.ByteString.Char8  as BS.Char8
 30 | import qualified Data.ByteString.Unsafe as BS
 31 | import qualified Data.ByteString.Lazy   as LBS
 32 | 
 33 | import Prelude hiding (read)
 34 | 
 35 | #if !MIN_VERSION_bytestring(0,10,0)
 36 | import Data.Monoid (Monoid(..))
 37 | import qualified Data.ByteString.Lazy.Internal as LBS
 38 | #endif
 39 | 
 40 | -- | Errors that can be encountered when parsing a Tar archive.
 41 | data FormatError
 42 |   = TruncatedArchive
 43 |   | ShortTrailer
 44 |   | BadTrailer
 45 |   | TrailingJunk
 46 |   | ChecksumIncorrect
 47 |   | NotTarFormat
 48 |   | UnrecognisedTarFormat
 49 |   | HeaderBadNumericEncoding
 50 | #if MIN_VERSION_base(4,8,0)
 51 |   deriving (Eq, Show, Typeable)
 52 | 
 53 | instance Exception FormatError where
 54 |   displayException TruncatedArchive         = "truncated tar archive"
 55 |   displayException ShortTrailer             = "short tar trailer"
 56 |   displayException BadTrailer               = "bad tar trailer"
 57 |   displayException TrailingJunk             = "tar file has trailing junk"
 58 |   displayException ChecksumIncorrect        = "tar checksum error"
 59 |   displayException NotTarFormat             = "data is not in tar format"
 60 |   displayException UnrecognisedTarFormat    = "tar entry not in a recognised format"
 61 |   displayException HeaderBadNumericEncoding = "tar header is malformed (bad numeric encoding)"
 62 | #else
 63 |   deriving (Eq, Typeable)
 64 | 
 65 | instance Show FormatError where
 66 |   show TruncatedArchive         = "truncated tar archive"
 67 |   show ShortTrailer             = "short tar trailer"
 68 |   show BadTrailer               = "bad tar trailer"
 69 |   show TrailingJunk             = "tar file has trailing junk"
 70 |   show ChecksumIncorrect        = "tar checksum error"
 71 |   show NotTarFormat             = "data is not in tar format"
 72 |   show UnrecognisedTarFormat    = "tar entry not in a recognised format"
 73 |   show HeaderBadNumericEncoding = "tar header is malformed (bad numeric encoding)"
 74 | 
 75 | instance Exception FormatError
 76 | #endif
 77 | 
 78 | instance NFData    FormatError where
 79 |   rnf !_ = () -- enumerations are fully strict by construction
 80 | 
 81 | -- | Convert a data stream in the tar file format into an internal data
 82 | -- structure. Decoding errors are reported by the 'Fail' constructor of the
 83 | -- 'Entries' type.
 84 | --
 85 | -- * The conversion is done lazily.
 86 | --
 87 | read :: LBS.ByteString -> Entries FormatError
 88 | read = unfoldEntries getEntry
 89 | 
 90 | getEntry :: LBS.ByteString -> Either FormatError (Maybe (Entry, LBS.ByteString))
 91 | getEntry bs
 92 |   | BS.length header < 512 = Left TruncatedArchive
 93 | 
 94 |   -- Tar files end with at least two blocks of all '0'. Checking this serves
 95 |   -- two purposes. It checks the format but also forces the tail of the data
 96 |   -- which is necessary to close the file if it came from a lazily read file.
 97 |   | LBS.head bs == 0 = case LBS.splitAt 1024 bs of
 98 |       (end, trailing)
 99 |         | LBS.length end /= 1024        -> Left ShortTrailer
100 |         | not (LBS.all (== 0) end)      -> Left BadTrailer
101 |         | not (LBS.all (== 0) trailing) -> Left TrailingJunk
102 |         | otherwise                     -> Right Nothing
103 | 
104 |   | otherwise  = partial $ do
105 | 
106 |   case (chksum_, format_) of
107 |     (Ok chksum, _   ) | correctChecksum header chksum -> return ()
108 |     (Ok _,      Ok _) -> Error ChecksumIncorrect
109 |     _                 -> Error NotTarFormat
110 | 
111 |   -- These fields are partial, have to check them
112 |   format   <- format_;   mode     <- mode_;
113 |   uid      <- uid_;      gid      <- gid_;
114 |   size     <- size_;     mtime    <- mtime_;
115 |   devmajor <- devmajor_; devminor <- devminor_;
116 | 
117 |   let content = LBS.take size (LBS.drop 512 bs)
118 |       padding = (512 - size) `mod` 512
119 |       bs'     = LBS.drop (512 + size + padding) bs
120 | 
121 |       entry = Entry {
122 |         entryTarPath     = TarPath name prefix,
123 |         entryContent     = case typecode of
124 |                    '\0' -> NormalFile      content size
125 |                    '0'  -> NormalFile      content size
126 |                    '1'  -> HardLink        (LinkTarget linkname)
127 |                    '2'  -> SymbolicLink    (LinkTarget linkname)
128 |                    _ | format == V7Format
129 |                         -> OtherEntryType  typecode content size
130 |                    '3'  -> CharacterDevice devmajor devminor
131 |                    '4'  -> BlockDevice     devmajor devminor
132 |                    '5'  -> Directory
133 |                    '6'  -> NamedPipe
134 |                    '7'  -> NormalFile      content size
135 |                    _    -> OtherEntryType  typecode content size,
136 |         entryPermissions = mode,
137 |         entryOwnership   = Ownership (BS.Char8.unpack uname)
138 |                                      (BS.Char8.unpack gname) uid gid,
139 |         entryTime        = mtime,
140 |         entryFormat      = format
141 |     }
142 | 
143 |   return (Just (entry, bs'))
144 | 
145 |   where
146 | #if MIN_VERSION_bytestring(0,10,0)
147 |    header = LBS.toStrict (LBS.take 512 bs)
148 | #else
149 |    header = toStrict (LBS.take 512 bs)
150 |    toStrict = LBS.foldrChunks mappend mempty
151 | #endif
152 | 
153 |    name       = getString   0 100 header
154 |    mode_      = getOct    100   8 header
155 |    uid_       = getOct    108   8 header
156 |    gid_       = getOct    116   8 header
157 |    size_      = getOct    124  12 header
158 |    mtime_     = getOct    136  12 header
159 |    chksum_    = getOct    148   8 header
160 |    typecode   = getByte   156     header
161 |    linkname   = getString 157 100 header
162 |    magic      = getChars  257   8 header
163 |    uname      = getString 265  32 header
164 |    gname      = getString 297  32 header
165 |    devmajor_  = getOct    329   8 header
166 |    devminor_  = getOct    337   8 header
167 |    prefix     = getString 345 155 header
168 | -- trailing   = getBytes  500  12 header
169 | 
170 |    format_
171 |      | magic == ustarMagic = return UstarFormat
172 |      | magic == gnuMagic   = return GnuFormat
173 |      | magic == v7Magic    = return V7Format
174 |      | otherwise           = Error UnrecognisedTarFormat
175 | 
176 | v7Magic, ustarMagic, gnuMagic :: BS.ByteString
177 | v7Magic    = BS.Char8.pack "\0\0\0\0\0\0\0\0"
178 | ustarMagic = BS.Char8.pack "ustar\NUL00"
179 | gnuMagic   = BS.Char8.pack "ustar  \NUL"
180 | 
181 | correctChecksum :: BS.ByteString -> Int -> Bool
182 | correctChecksum header checksum = checksum == checksum'
183 |   where
184 |     -- sum of all 512 bytes in the header block,
185 |     -- treating each byte as an 8-bit unsigned value
186 |     sumchars  = BS.foldl' (\x y -> x + fromIntegral y) 0
187 |     -- treating the 8 bytes of chksum as blank characters.
188 |     checksum' = sumchars (BS.take 148 header)
189 |               + 256 -- 256 = sumchars (BS.Char8.replicate 8 ' ')
190 |               + sumchars (BS.drop 156 header)
191 | 
192 | -- * TAR format primitive input
193 | 
194 | {-# SPECIALISE getOct :: Int -> Int -> BS.ByteString -> Partial FormatError Int   #-}
195 | {-# SPECIALISE getOct :: Int -> Int -> BS.ByteString -> Partial FormatError Int64 #-}
196 | getOct :: (Integral a, Bits a) => Int -> Int -> BS.ByteString -> Partial FormatError a
197 | getOct off len = parseOct
198 |                . BS.Char8.takeWhile (\c -> c /= '\NUL' && c /= ' ')
199 |                . BS.Char8.dropWhile (== ' ')
200 |                . getBytes off len
201 |   where
202 |     parseOct s | BS.null s = return 0
203 |     -- As a star extension, octal fields can hold a base-256 value if the high
204 |     -- bit of the initial character is set. The initial character can be:
205 |     --   0x80 ==> trailing characters hold a positive base-256 value
206 |     --   0xFF ==> trailing characters hold a negative base-256 value
207 |     --
208 |     -- In both cases, there won't be a trailing NUL/space.
209 |     --
210 |     -- GNU tar seems to contain a half-implementation of code that deals with
211 |     -- extra bits in the first character, but I don't think it works and the
212 |     -- docs I can find on star seem to suggest that these will always be 0,
213 |     -- which is what I will assume.
214 |     parseOct s | BS.head s == 128 = return (readBytes (BS.tail s))
215 |                | BS.head s == 255 = return (negate (readBytes (BS.tail s)))
216 |     parseOct s  = case readOct s of
217 |       Just x  -> return x
218 |       Nothing -> Error HeaderBadNumericEncoding
219 | 
220 |     readBytes :: (Integral a, Bits a) => BS.ByteString -> a
221 |     readBytes = BS.foldl' (\acc x -> acc `shiftL` 8 + fromIntegral x) 0
222 | 
223 | getBytes :: Int -> Int -> BS.ByteString -> BS.ByteString
224 | getBytes off len = BS.take len . BS.drop off
225 | 
226 | getByte :: Int -> BS.ByteString -> Char
227 | getByte off bs = BS.Char8.index bs off
228 | 
229 | getChars :: Int -> Int -> BS.ByteString -> BS.ByteString
230 | getChars off len = getBytes off len
231 | 
232 | getString :: Int -> Int -> BS.ByteString -> BS.ByteString
233 | getString off len = BS.copy . BS.Char8.takeWhile (/='\0') . getBytes off len
234 | 
235 | -- These days we'd just use Either, but in older versions of base there was no
236 | -- Monad instance for Either, it was in mtl with an anoying Error constraint.
237 | --
238 | data Partial e a = Error e | Ok a
239 | 
240 | partial :: Partial e a -> Either e a
241 | partial (Error msg) = Left msg
242 | partial (Ok x)      = Right x
243 | 
244 | instance Functor (Partial e) where
245 |     fmap = liftM
246 | 
247 | instance Applicative (Partial e) where
248 |     pure  = Ok
249 |     (<*>) = ap
250 | 
251 | instance Monad (Partial e) where
252 |     return        = pure
253 |     Error m >>= _ = Error m
254 |     Ok    x >>= k = k x
255 |     fail          = error "fail @(Partial e)"
256 | 
257 | {-# SPECIALISE readOct :: BS.ByteString -> Maybe Int   #-}
258 | {-# SPECIALISE readOct :: BS.ByteString -> Maybe Int64 #-}
259 | readOct :: Integral n => BS.ByteString -> Maybe n
260 | readOct bs0 = case go 0 0 bs0 of
261 |                 -1 -> Nothing
262 |                 n  -> Just n
263 |   where
264 |     go :: Integral n => Int -> n -> BS.ByteString -> n
265 |     go !i !n !bs
266 |       | BS.null bs = if i == 0 then -1 else n
267 |       | otherwise  =
268 |           case BS.unsafeHead bs of
269 |             w | w >= 0x30
270 |              && w <= 0x39 -> go (i+1)
271 |                                 (n * 8 + (fromIntegral w - 0x30))
272 |                                 (BS.unsafeTail bs)
273 |               | otherwise -> -1
274 | 


--------------------------------------------------------------------------------
/Codec/Archive/Tar.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP #-}
  2 | -----------------------------------------------------------------------------
  3 | -- |
  4 | -- Module      :  Codec.Archive.Tar
  5 | -- Copyright   :  (c) 2007 Bjorn Bringert,
  6 | --                    2008 Andrea Vezzosi,
  7 | --                    2008-2012 Duncan Coutts
  8 | -- License     :  BSD3
  9 | --
 10 | -- Maintainer  :  duncan@community.haskell.org
 11 | -- Portability :  portable
 12 | --
 13 | -- Reading, writing and manipulating \"@.tar@\" archive files.
 14 | --
 15 | -- This module uses common names and so is designed to be imported qualified:
 16 | --
 17 | -- > import qualified Codec.Archive.Tar as Tar
 18 | --
 19 | -----------------------------------------------------------------------------
 20 | module Codec.Archive.Tar (
 21 | 
 22 |   -- | Tar archive files are used to store a collection of other files in a
 23 |   -- single file. They consists of a sequence of entries. Each entry describes
 24 |   -- a file or directory (or some other special kind of file). The entry stores
 25 |   -- a little bit of meta-data, in particular the file or directory name.
 26 |   --
 27 |   -- Unlike some other archive formats, a tar file contains no index. The
 28 |   -- information about each entry is stored next to the entry. Because of this,
 29 |   -- tar files are almost always processed linearly rather than in a
 30 |   -- random-access fashion.
 31 |   --
 32 |   -- The functions in this package are designed for working on tar files
 33 |   -- linearly and lazily. This makes it possible to do many operations in
 34 |   -- constant space rather than having to load the entire archive into memory.
 35 |   --
 36 |   -- It can read and write standard POSIX tar files and also the GNU and old
 37 |   -- Unix V7 tar formats. The convenience functions that are provided in the
 38 |   -- "Codec.Archive.Tar.Entry" module for creating archive entries are
 39 |   -- primarily designed for standard portable archives. If you need to
 40 |   -- construct GNU format archives or exactly preserve file ownership and
 41 |   -- permissions then you will need to write some extra helper functions.
 42 |   --
 43 |   -- This module contains just the simple high level operations without
 44 |   -- exposing the all the details of tar files. If you need to inspect tar
 45 |   -- entries in more detail or construct them directly then you also need
 46 |   -- the module "Codec.Archive.Tar.Entry".
 47 | 
 48 |   -- * High level \"all in one\" operations
 49 |   create,
 50 |   extract,
 51 |   append,
 52 | 
 53 |   -- * Notes
 54 |   -- ** Compressed tar archives
 55 |   -- | Tar files are commonly used in conjunction with gzip compression, as in
 56 |   -- \"@.tar.gz@\" or \"@.tar.bz2@\" files. This module does not directly
 57 |   -- handle compressed tar files however they can be handled easily by
 58 |   -- composing functions from this module and the modules
 59 |   -- @Codec.Compression.GZip@ or @Codec.Compression.BZip@
 60 |   -- (see @zlib@ or @bzlib@ packages).
 61 |   --
 62 |   -- Creating a compressed \"@.tar.gz@\" file is just a minor variation on the
 63 |   -- 'create' function, but where throw compression into the pipeline:
 64 |   --
 65 |   -- > BS.writeFile tar . GZip.compress . Tar.write =<< Tar.pack base dir
 66 |   --
 67 |   -- Similarly, extracting a compressed \"@.tar.gz@\" is just a minor variation
 68 |   -- on the 'extract' function where we use decompression in the pipeline:
 69 |   --
 70 |   -- > Tar.unpack dir . Tar.read . GZip.decompress =<< BS.readFile tar
 71 |   --
 72 | 
 73 |   -- ** Security
 74 |   -- | This is pretty important. A maliciously constructed tar archives could
 75 |   -- contain entries that specify bad file names. It could specify absolute
 76 |   -- file names like \"@\/etc\/passwd@\" or relative files outside of the
 77 |   -- archive like \"..\/..\/..\/something\". This security problem is commonly
 78 |   -- called a \"directory traversal vulnerability\". Historically, such
 79 |   -- vulnerabilities have been common in packages handling tar archives.
 80 |   --
 81 |   -- The 'extract' and 'unpack' functions check for bad file names. See the
 82 |   -- 'checkSecurity' function for more details. If you need to do any custom
 83 |   -- unpacking then you should use this.
 84 | 
 85 |   -- ** Tarbombs
 86 |   -- | A \"tarbomb\" is a @.tar@ file where not all entries are in a
 87 |   -- subdirectory but instead files extract into the top level directory. The
 88 |   -- 'extract' function does not check for these however if you want to do
 89 |   -- that you can use the 'checkTarbomb' function like so:
 90 |   --
 91 |   -- > Tar.unpack dir . Tar.checkTarbomb expectedDir
 92 |   -- >                . Tar.read =<< BS.readFile tar
 93 |   --
 94 |   -- In this case extraction will fail if any file is outside of @expectedDir@.
 95 | 
 96 |   -- * Converting between internal and external representation
 97 |   -- | Note, you cannot expect @write . read@ to give exactly the same output
 98 |   -- as input. You can expect the information to be preserved exactly however.
 99 |   -- This is because 'read' accepts common format variations while 'write'
100 |   -- produces the standard format.
101 |   read,
102 |   write,
103 | 
104 |   -- * Packing and unpacking files to\/from internal representation
105 |   -- | These functions are for packing and unpacking portable archives. They
106 |   -- are not suitable in cases where it is important to preserve file ownership
107 |   -- and permissions or to archive special files like named pipes and Unix
108 |   -- device files.
109 |   pack,
110 |   unpack,
111 | 
112 |   -- * Types
113 |   -- ** Tar entry type
114 |   -- | This module provides only very simple and limited read-only access to
115 |   -- the 'Entry' type. If you need access to the details or if you need to
116 |   -- construct your own entries then also import "Codec.Archive.Tar.Entry".
117 |   Entry,
118 |   entryPath,
119 |   entryContent,
120 |   EntryContent(..),
121 | 
122 |   -- ** Sequences of tar entries
123 |   Entries(..),
124 |   mapEntries,
125 |   mapEntriesNoFail,
126 |   foldEntries,
127 |   foldlEntries,
128 |   unfoldEntries,
129 | 
130 |   -- * Error handling
131 |   -- | Reading tar files can fail if the data does not match the tar file
132 |   -- format correctly.
133 |   --
134 |   -- The style of error handling by returning structured errors. The pure
135 |   -- functions in the library do not throw exceptions, they return the errors
136 |   -- as data. The IO actions in the library can throw exceptions, in particular
137 |   -- the 'unpack' action does this. All the error types used are an instance of
138 |   -- the standard 'Exception' class so it is possible to 'throw' and 'catch'
139 |   -- them.
140 | 
141 |   -- ** Errors from reading tar files
142 |   FormatError(..),
143 | 
144 | #ifdef TESTS
145 |   prop_write_read_ustar,
146 |   prop_write_read_gnu,
147 |   prop_write_read_v7,
148 | #endif
149 |   ) where
150 | 
151 | import Codec.Archive.Tar.Types
152 | 
153 | import Codec.Archive.Tar.Read
154 | import Codec.Archive.Tar.Write
155 | 
156 | import Codec.Archive.Tar.Pack
157 | import Codec.Archive.Tar.Unpack
158 | import Codec.Archive.Tar.Index (hSeekEndEntryOffset)
159 | 
160 | import Codec.Archive.Tar.Check
161 | 
162 | import Control.Exception (Exception, throw, catch)
163 | import qualified Data.ByteString.Lazy as BS
164 | import System.IO (withFile, IOMode(..))
165 | import Prelude hiding (read)
166 | 
167 | -- | Create a new @\".tar\"@ file from a directory of files.
168 | --
169 | -- It is equivalent to calling the standard @tar@ program like so:
170 | --
171 | -- @$ tar -f tarball.tar -C base -c dir@
172 | --
173 | -- This assumes a directory @.\/base\/dir@ with files inside, eg
174 | -- @.\/base\/dir\/foo.txt@. The file names inside the resulting tar file will be
175 | -- relative to @dir@, eg @dir\/foo.txt@.
176 | --
177 | -- This is a high level \"all in one\" operation. Since you may need variations
178 | -- on this function it is instructive to see how it is written. It is just:
179 | --
180 | -- > BS.writeFile tar . Tar.write =<< Tar.pack base paths
181 | --
182 | -- Notes:
183 | --
184 | -- The files and directories must not change during this operation or the
185 | -- result is not well defined.
186 | --
187 | -- The intention of this function is to create tarballs that are portable
188 | -- between systems. It is /not/ suitable for doing file system backups because
189 | -- file ownership and permissions are not fully preserved. File ownership is
190 | -- not preserved at all. File permissions are set to simple portable values:
191 | --
192 | -- * @rw-r--r--@ for normal files
193 | --
194 | -- * @rwxr-xr-x@ for executable files
195 | --
196 | -- * @rwxr-xr-x@ for directories
197 | --
198 | create :: FilePath   -- ^ Path of the \".tar\" file to write.
199 |        -> FilePath   -- ^ Base directory
200 |        -> [FilePath] -- ^ Files and directories to archive, relative to base dir
201 |        -> IO ()
202 | create tar base paths = BS.writeFile tar . write =<< pack base paths
203 | 
204 | -- | Extract all the files contained in a @\".tar\"@ file.
205 | --
206 | -- It is equivalent to calling the standard @tar@ program like so:
207 | --
208 | -- @$ tar -x -f tarball.tar -C dir@
209 | --
210 | -- So for example if the @tarball.tar@ file contains @foo\/bar.txt@ then this
211 | -- will extract it to @dir\/foo\/bar.txt@.
212 | --
213 | -- This is a high level \"all in one\" operation. Since you may need variations
214 | -- on this function it is instructive to see how it is written. It is just:
215 | --
216 | -- > Tar.unpack dir . Tar.read =<< BS.readFile tar
217 | --
218 | -- Notes:
219 | --
220 | -- Extracting can fail for a number of reasons. The tarball may be incorrectly
221 | -- formatted. There may be IO or permission errors. In such cases an exception
222 | -- will be thrown and extraction will not continue.
223 | --
224 | -- Since the extraction may fail part way through it is not atomic. For this
225 | -- reason you may want to extract into an empty directory and, if the
226 | -- extraction fails, recursively delete the directory.
227 | --
228 | -- Security: only files inside the target directory will be written. Tarballs
229 | -- containing entries that point outside of the tarball (either absolute paths
230 | -- or relative paths) will be caught and an exception will be thrown.
231 | --
232 | extract :: FilePath -- ^ Destination directory
233 |         -> FilePath -- ^ Tarball
234 |         -> IO ()
235 | extract dir tar = unpack dir . read =<< BS.readFile tar
236 | 
237 | -- | Append new entries to a @\".tar\"@ file from a directory of files.
238 | --
239 | -- This is much like 'create', except that all the entries are added to the
240 | -- end of an existing tar file. Or if the file does not already exists then
241 | -- it behaves the same as 'create'.
242 | --
243 | append :: FilePath   -- ^ Path of the \".tar\" file to write.
244 |        -> FilePath   -- ^ Base directory
245 |        -> [FilePath] -- ^ Files and directories to archive, relative to base dir
246 |        -> IO ()
247 | append tar base paths =
248 |     withFile tar ReadWriteMode $ \hnd -> do
249 |       _ <- hSeekEndEntryOffset hnd Nothing
250 |       BS.hPut hnd . write =<< pack base paths
251 | 
252 | -------------------------
253 | -- Correctness properties
254 | --
255 | 
256 | #ifdef TESTS
257 | 
258 | prop_write_read_ustar :: [Entry] -> Bool
259 | prop_write_read_ustar entries =
260 |     foldr Next Done entries' == read (write entries')
261 |   where
262 |     entries' = [ e { entryFormat = UstarFormat } | e <- entries ]
263 | 
264 | prop_write_read_gnu :: [Entry] -> Bool
265 | prop_write_read_gnu entries =
266 |     foldr Next Done entries' == read (write entries')
267 |   where
268 |     entries' = [ e { entryFormat = GnuFormat } | e <- entries ]
269 | 
270 | prop_write_read_v7 :: [Entry] -> Bool
271 | prop_write_read_v7 entries =
272 |     foldr Next Done entries' == read (write entries')
273 |   where
274 |     entries' = [ limitToV7FormatCompat e { entryFormat = V7Format }
275 |                | e <- entries ]
276 | 
277 | #endif
278 | 


--------------------------------------------------------------------------------
/Codec/Archive/Tar/Index/StringTable.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP, BangPatterns, PatternGuards, DeriveDataTypeable #-}
  2 | 
  3 | module Codec.Archive.Tar.Index.StringTable (
  4 | 
  5 |     StringTable,
  6 |     lookup,
  7 |     index,
  8 |     construct,
  9 | 
 10 |     StringTableBuilder,
 11 |     empty,
 12 |     insert,
 13 |     inserts,
 14 |     finalise,
 15 |     unfinalise,
 16 | 
 17 |     serialise,
 18 |     serialiseSize,
 19 |     deserialiseV1,
 20 |     deserialiseV2,
 21 | 
 22 | #ifdef TESTS
 23 |     prop_valid,
 24 |     prop_sorted,
 25 |     prop_finalise_unfinalise,
 26 |     prop_serialise_deserialise,
 27 |     prop_serialiseSize,
 28 | #endif
 29 |  ) where
 30 | 
 31 | import Data.Typeable (Typeable)
 32 | 
 33 | import Prelude   hiding (lookup, id)
 34 | import Data.List hiding (lookup, insert)
 35 | import Data.Function (on)
 36 | import Data.Word (Word32)
 37 | import Data.Int  (Int32)
 38 | import Data.Bits
 39 | import Data.Monoid (Monoid(..))
 40 | #if (MIN_VERSION_base(4,5,0))
 41 | import Data.Monoid ((<>))
 42 | #endif
 43 | import Control.Exception (assert)
 44 | 
 45 | import qualified Data.Array.Unboxed as A
 46 | import           Data.Array.Unboxed ((!))
 47 | #if MIN_VERSION_containers(0,5,0)
 48 | import qualified Data.Map.Strict        as Map
 49 | import           Data.Map.Strict (Map)
 50 | #else
 51 | import qualified Data.Map               as Map
 52 | import           Data.Map (Map)
 53 | #endif
 54 | import qualified Data.ByteString        as BS
 55 | import qualified Data.ByteString.Unsafe as BS
 56 | import qualified Data.ByteString.Lazy   as LBS
 57 | #if MIN_VERSION_bytestring(0,10,2) || defined(MIN_VERSION_bytestring_builder)
 58 | import Data.ByteString.Builder          as BS
 59 | import Data.ByteString.Builder.Extra    as BS (byteStringCopy)
 60 | #else
 61 | import Data.ByteString.Lazy.Builder     as BS
 62 | import Data.ByteString.Lazy.Builder.Extras as BS (byteStringCopy)
 63 | #endif
 64 | 
 65 | 
 66 | -- | An effecient mapping from strings to a dense set of integers.
 67 | --
 68 | data StringTable id = StringTable
 69 |          {-# UNPACK #-} !BS.ByteString           -- all strings concatenated
 70 |          {-# UNPACK #-} !(A.UArray Int32 Word32) -- string offset table
 71 |          {-# UNPACK #-} !(A.UArray Int32 Int32)  -- string index to id table
 72 |          {-# UNPACK #-} !(A.UArray Int32 Int32)  -- string id to index table
 73 |   deriving (Show, Typeable)
 74 | 
 75 | instance (Eq id, Enum id) => Eq (StringTable id) where
 76 |   tbl1 == tbl2 = unfinalise tbl1 == unfinalise tbl2
 77 | 
 78 | -- | Look up a string in the token table. If the string is present, return
 79 | -- its corresponding index.
 80 | --
 81 | lookup :: Enum id => StringTable id -> BS.ByteString -> Maybe id
 82 | lookup (StringTable bs offsets ids _ixs) str =
 83 |     binarySearch 0 (topBound-1) str
 84 |   where
 85 |     (0, topBound) = A.bounds offsets
 86 | 
 87 |     binarySearch !a !b !key
 88 |       | a > b     = Nothing
 89 |       | otherwise = case compare key (index' bs offsets mid) of
 90 |           LT -> binarySearch a (mid-1) key
 91 |           EQ -> Just $! toEnum (fromIntegral (ids ! mid))
 92 |           GT -> binarySearch (mid+1) b key
 93 |       where mid = (a + b) `div` 2
 94 | 
 95 | index' :: BS.ByteString -> A.UArray Int32 Word32 -> Int32 -> BS.ByteString
 96 | index' bs offsets i = BS.unsafeTake len . BS.unsafeDrop start $ bs
 97 |   where
 98 |     start, end, len :: Int
 99 |     start = fromIntegral (offsets ! i)
100 |     end   = fromIntegral (offsets ! (i+1))
101 |     len   = end - start
102 | 
103 | 
104 | -- | Given the index of a string in the table, return the string.
105 | --
106 | index :: Enum id => StringTable id -> id -> BS.ByteString
107 | index (StringTable bs offsets _ids ixs) =
108 |     index' bs offsets . (ixs !) . fromIntegral . fromEnum
109 | 
110 | 
111 | -- | Given a list of strings, construct a 'StringTable' mapping those strings
112 | -- to a dense set of integers. Also return the ids for all the strings used
113 | -- in the construction.
114 | --
115 | construct :: Enum id => [BS.ByteString] -> StringTable id
116 | construct = finalise . foldl' (\tbl s -> fst (insert s tbl)) empty
117 | 
118 | 
119 | data StringTableBuilder id = StringTableBuilder
120 |                                               !(Map BS.ByteString id)
121 |                                {-# UNPACK #-} !Word32
122 |   deriving (Eq, Show, Typeable)
123 | 
124 | empty :: StringTableBuilder id
125 | empty = StringTableBuilder Map.empty 0
126 | 
127 | insert :: Enum id => BS.ByteString -> StringTableBuilder id -> (StringTableBuilder id, id)
128 | insert str builder@(StringTableBuilder smap nextid) =
129 |     case Map.lookup str smap of
130 |       Just id -> (builder, id)
131 |       Nothing -> let !id   = toEnum (fromIntegral nextid)
132 |                      !smap' = Map.insert str id smap
133 |                    in (StringTableBuilder smap' (nextid+1), id)
134 | 
135 | inserts :: Enum id => [BS.ByteString] -> StringTableBuilder id -> (StringTableBuilder id, [id])
136 | inserts bss builder = mapAccumL (flip insert) builder bss
137 | 
138 | finalise :: Enum id => StringTableBuilder id -> StringTable id
139 | finalise (StringTableBuilder smap _) =
140 |     (StringTable strs offsets ids ixs)
141 |   where
142 |     strs    = BS.concat (Map.keys smap)
143 |     offsets = A.listArray (0, fromIntegral (Map.size smap))
144 |             . scanl (\off str -> off + fromIntegral (BS.length str)) 0
145 |             $ Map.keys smap
146 |     ids     = A.listArray (0, fromIntegral (Map.size smap) - 1)
147 |             . map (fromIntegral . fromEnum)
148 |             $ Map.elems smap
149 |     ixs     = A.array (A.bounds ids) [ (id,ix) | (ix,id) <- A.assocs ids ]
150 | 
151 | unfinalise :: Enum id => StringTable id -> StringTableBuilder id
152 | unfinalise (StringTable strs offsets ids _) =
153 |     StringTableBuilder smap nextid
154 |   where
155 |     smap   = Map.fromAscList
156 |                [ (index' strs offsets ix, toEnum (fromIntegral (ids ! ix)))
157 |                | ix <- [0..h] ]
158 |     (0,h)  = A.bounds ids
159 |     nextid = fromIntegral (h+1)
160 | 
161 | 
162 | -------------------------
163 | -- (de)serialisation
164 | --
165 | 
166 | serialise :: StringTable id -> BS.Builder
167 | serialise (StringTable strs offs ids ixs) =
168 |       let (_, !ixEnd) = A.bounds offs in
169 | 
170 |       BS.word32BE (fromIntegral (BS.length strs))
171 |    <> BS.word32BE (fromIntegral ixEnd + 1)
172 |    <> BS.byteStringCopy strs
173 |    <> foldr (\n r -> BS.word32BE n <> r) mempty (A.elems offs)
174 |    <> foldr (\n r -> BS.int32BE  n <> r) mempty (A.elems ids)
175 |    <> foldr (\n r -> BS.int32BE  n <> r) mempty (A.elems ixs)
176 | 
177 | serialiseSize :: StringTable id -> Int
178 | serialiseSize (StringTable strs offs _ids _ixs) =
179 |     let (_, !ixEnd) = A.bounds offs
180 |      in 4 * 2
181 |       + BS.length strs
182 |       + 4 * (fromIntegral ixEnd + 1)
183 |       + 8 *  fromIntegral ixEnd
184 | 
185 | deserialiseV1 :: BS.ByteString -> Maybe (StringTable id, BS.ByteString)
186 | deserialiseV1 bs
187 |   | BS.length bs >= 8
188 |   , let lenStrs = fromIntegral (readWord32BE bs 0)
189 |         lenArr  = fromIntegral (readWord32BE bs 4)
190 |         lenTotal= 8 + lenStrs + 4 * lenArr
191 |   , BS.length bs >= lenTotal
192 |   , let strs = BS.take lenStrs (BS.drop 8 bs)
193 |         arr  = A.array (0, fromIntegral lenArr - 1)
194 |                        [ (i, readWord32BE bs off)
195 |                        | (i, off) <- zip [0 .. fromIntegral lenArr - 1]
196 |                                          [offArrS,offArrS+4 .. offArrE]
197 |                        ]
198 |         ids  = A.array (0, fromIntegral lenArr - 1)
199 |                        [ (i,i) | i <- [0 .. fromIntegral lenArr - 1] ]
200 |         ixs  = ids -- two identity mappings
201 |         offArrS = 8 + lenStrs
202 |         offArrE = offArrS + 4 * lenArr - 1
203 |         !stringTable = StringTable strs arr ids ixs
204 |         !bs'         = BS.drop lenTotal bs
205 |   = Just (stringTable, bs')
206 | 
207 |   | otherwise
208 |   = Nothing
209 | 
210 | deserialiseV2 :: BS.ByteString -> Maybe (StringTable id, BS.ByteString)
211 | deserialiseV2 bs
212 |   | BS.length bs >= 8
213 |   , let lenStrs = fromIntegral (readWord32BE bs 0)
214 |         lenArr  = fromIntegral (readWord32BE bs 4)
215 |         lenTotal= 8                   -- the two length prefixes
216 |                 + lenStrs
217 |                 + 4 * lenArr
218 |                 +(4 * (lenArr - 1)) * 2 -- offsets array is 1 longer
219 |   , BS.length bs >= lenTotal
220 |   , let strs = BS.take lenStrs (BS.drop 8 bs)
221 |         offs = A.listArray (0, fromIntegral lenArr - 1)
222 |                            [ readWord32BE bs off
223 |                            | off <- offsets offsOff ]
224 |         -- the second two arrays are 1 shorter
225 |         ids  = A.listArray (0, fromIntegral lenArr - 2)
226 |                            [ readInt32BE bs off
227 |                            | off <- offsets idsOff ]
228 |         ixs  = A.listArray (0, fromIntegral lenArr - 2)
229 |                            [ readInt32BE bs off
230 |                            | off <- offsets ixsOff ]
231 |         offsOff = 8 + lenStrs
232 |         idsOff  = offsOff + 4 * lenArr
233 |         ixsOff  = idsOff  + 4 * (lenArr-1)
234 |         offsets from = [from,from+4 .. from + 4 * (lenArr - 1)]
235 |         !stringTable = StringTable strs offs ids ixs
236 |         !bs'         = BS.drop lenTotal bs
237 |   = Just (stringTable, bs')
238 | 
239 |   | otherwise
240 |   = Nothing
241 | 
242 | readInt32BE :: BS.ByteString -> Int -> Int32
243 | readInt32BE bs i = fromIntegral (readWord32BE bs i)
244 | 
245 | readWord32BE :: BS.ByteString -> Int -> Word32
246 | readWord32BE bs i =
247 |     assert (i >= 0 && i+3 <= BS.length bs - 1) $
248 |     fromIntegral (BS.unsafeIndex bs (i + 0)) `shiftL` 24
249 |   + fromIntegral (BS.unsafeIndex bs (i + 1)) `shiftL` 16
250 |   + fromIntegral (BS.unsafeIndex bs (i + 2)) `shiftL` 8
251 |   + fromIntegral (BS.unsafeIndex bs (i + 3))
252 | 
253 | #ifdef TESTS
254 | 
255 | prop_valid :: [BS.ByteString] -> Bool
256 | prop_valid strs =
257 |      all lookupIndex (enumStrings tbl)
258 |   && all indexLookup (enumIds tbl)
259 | 
260 |   where
261 |     tbl :: StringTable Int
262 |     tbl = construct strs
263 | 
264 |     lookupIndex str = index tbl ident == str
265 |       where Just ident = lookup tbl str
266 | 
267 |     indexLookup ident = lookup tbl str == Just ident
268 |       where str       = index tbl ident
269 | 
270 | -- this is important so we can use Map.fromAscList
271 | prop_sorted :: [BS.ByteString] -> Bool
272 | prop_sorted strings =
273 |     isSorted [ index' strs offsets ix
274 |              | ix <- A.range (A.bounds ids) ]
275 |   where
276 |     _tbl :: StringTable Int
277 |     _tbl@(StringTable strs offsets ids _ixs) = construct strings
278 |     isSorted xs = and (zipWith (<) xs (tail xs))
279 | 
280 | prop_finalise_unfinalise :: [BS.ByteString] -> Bool
281 | prop_finalise_unfinalise strs =
282 |     builder == unfinalise (finalise builder)
283 |   where
284 |     builder :: StringTableBuilder Int
285 |     builder = foldl' (\tbl s -> fst (insert s tbl)) empty strs
286 | 
287 | prop_serialise_deserialise :: [BS.ByteString] -> Bool
288 | prop_serialise_deserialise strs =
289 |     Just (strtable, BS.empty) == (deserialiseV2
290 |                                 . toStrict . BS.toLazyByteString
291 |                                 . serialise) strtable
292 |   where
293 |     strtable :: StringTable Int
294 |     strtable = construct strs
295 | 
296 | prop_serialiseSize :: [BS.ByteString] -> Bool
297 | prop_serialiseSize strs =
298 |     (fromIntegral . LBS.length . BS.toLazyByteString . serialise) strtable
299 |  == serialiseSize strtable
300 |   where
301 |     strtable :: StringTable Int
302 |     strtable = construct strs
303 | 
304 | enumStrings :: Enum id => StringTable id -> [BS.ByteString]
305 | enumStrings (StringTable bs offsets _ _) = map (index' bs offsets) [0..h-1]
306 |   where (0,h) = A.bounds offsets
307 | 
308 | enumIds :: Enum id => StringTable id -> [id]
309 | enumIds (StringTable _ offsets _ _) = [toEnum 0 .. toEnum (fromIntegral (h-1))]
310 |   where (0,h) = A.bounds offsets
311 | 
312 | toStrict :: LBS.ByteString -> BS.ByteString
313 | #if MIN_VERSION_bytestring(0,10,0)
314 | toStrict = LBS.toStrict
315 | #else
316 | toStrict = BS.concat . LBS.toChunks
317 | #endif
318 | 
319 | #endif
320 | 
321 | #if !(MIN_VERSION_base(4,5,0))
322 | (<>) :: Monoid m => m -> m -> m
323 | (<>) = mappend
324 | #endif
325 | 


--------------------------------------------------------------------------------
/Codec/Archive/Tar/Index/IntTrie.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP, BangPatterns, PatternGuards #-}
  2 | {-# LANGUAGE DeriveDataTypeable, ScopedTypeVariables #-}
  3 | 
  4 | module Codec.Archive.Tar.Index.IntTrie (
  5 | 
  6 |   IntTrie,
  7 |   construct,
  8 |   toList,
  9 | 
 10 |   IntTrieBuilder,
 11 |   empty,
 12 |   insert,
 13 |   finalise,
 14 |   unfinalise,
 15 | 
 16 |   lookup,
 17 |   TrieLookup(..),
 18 | 
 19 |   serialise,
 20 |   serialiseSize,
 21 |   deserialise,
 22 | 
 23 | #ifdef TESTS
 24 |   test1, test2, test3,
 25 |   ValidPaths(..),
 26 |   prop_lookup,
 27 |   prop_completions,
 28 |   prop_lookup_mono,
 29 |   prop_completions_mono,
 30 |   prop_construct_toList,
 31 |   prop_finalise_unfinalise,
 32 |   prop_serialise_deserialise,
 33 |   prop_serialiseSize,
 34 | #endif
 35 |  ) where
 36 | 
 37 | import Prelude hiding (lookup)
 38 | 
 39 | import Data.Typeable (Typeable)
 40 | 
 41 | import qualified Data.Array.Unboxed as A
 42 | import Data.Array.IArray  ((!))
 43 | import qualified Data.Bits as Bits
 44 | import Data.Word (Word32)
 45 | import Data.Bits
 46 | import Data.Monoid (Monoid(..))
 47 | #if (MIN_VERSION_base(4,5,0))
 48 | import Data.Monoid ((<>))
 49 | #endif
 50 | import qualified Data.ByteString        as BS
 51 | import qualified Data.ByteString.Lazy   as LBS
 52 | import qualified Data.ByteString.Unsafe as BS
 53 | #if MIN_VERSION_bytestring(0,10,2) || defined(MIN_VERSION_bytestring_builder)
 54 | import Data.ByteString.Builder          as BS
 55 | #else
 56 | import Data.ByteString.Lazy.Builder     as BS
 57 | #endif
 58 | import Control.Exception (assert)
 59 | #if MIN_VERSION_containers(0,5,0)
 60 | import qualified Data.Map.Strict        as Map
 61 | import qualified Data.IntMap.Strict     as IntMap
 62 | import Data.IntMap.Strict (IntMap)
 63 | #else
 64 | import qualified Data.Map               as Map
 65 | import qualified Data.IntMap            as IntMap
 66 | import Data.IntMap (IntMap)
 67 | #endif
 68 | 
 69 | import Data.List hiding (lookup, insert)
 70 | import Data.Function (on)
 71 | 
 72 | #ifdef TESTS
 73 | import Test.QuickCheck
 74 | import Control.Applicative ((<$>), (<*>))
 75 | #endif
 76 | 
 77 | 
 78 | -- | A compact mapping from sequences of nats to nats.
 79 | --
 80 | -- NOTE: The tries in this module have values /only/ at the leaves (which
 81 | -- correspond to files), they do not have values at the branch points (which
 82 | -- correspond to directories).
 83 | newtype IntTrie k v = IntTrie (A.UArray Word32 Word32)
 84 |     deriving (Eq, Show, Typeable)
 85 | 
 86 | 
 87 | -- Compact, read-only implementation of a trie. It's intended for use with file
 88 | -- paths, but we do that via string ids.
 89 | 
 90 | #ifdef TESTS
 91 | -- Example mapping:
 92 | --
 93 | example0 :: [(FilePath, Int)]
 94 | example0 =
 95 |   [("foo-1.0/foo-1.0.cabal", 512)   -- tar block 1
 96 |   ,("foo-1.0/LICENSE",       2048)  -- tar block 4
 97 |   ,("foo-1.0/Data/Foo.hs",   4096)] -- tar block 8
 98 | 
 99 | -- After converting path components to integers this becomes:
100 | --
101 | example1 :: [([Word32], Word32)]
102 | example1 =
103 |   [([1,2],   512)
104 |   ,([1,3],   2048)
105 |   ,([1,4,5], 4096)]
106 | 
107 | -- As a trie this looks like:
108 | 
109 | --  [ (1, *) ]
110 | --        |
111 | --        [ (2, 512), (3, 1024), (4, *) ]
112 | --                                   |
113 | --                                   [ (5, 4096) ]
114 | 
115 | -- We use an intermediate trie representation
116 | 
117 | mktrie :: [(Int, TrieNode k v)] -> IntTrieBuilder k v
118 | mkleaf :: (Enum k, Enum v) => k -> v                  -> (Int, TrieNode k v)
119 | mknode ::  Enum k          => k -> IntTrieBuilder k v -> (Int, TrieNode k v)
120 | 
121 | mktrie = IntTrieBuilder . IntMap.fromList
122 | mkleaf k v = (fromEnum k, TrieLeaf (enumToWord32 v))
123 | mknode k t = (fromEnum k, TrieNode t) 
124 | 
125 | example2 :: IntTrieBuilder Word32 Word32
126 | example2 = mktrie [ mknode 1 t1 ]
127 |   where
128 |     t1   = mktrie [ mkleaf 2 512, mkleaf 3 2048, mknode 4 t2 ]
129 |     t2   = mktrie [ mkleaf 5 4096 ]
130 | 
131 | 
132 | example2' :: IntTrieBuilder Word32 Word32
133 | example2' = mktrie [ mknode 0 t1 ]
134 |   where
135 |     t1   = mktrie [ mknode 3 t2 ]
136 |     t2   = mktrie [ mknode 1 t3, mknode 2 t4 ]
137 |     t3   = mktrie [ mkleaf 4 10608 ]
138 |     t4   = mktrie [ mkleaf 4 10612 ]
139 | {-
140 | 0: [1,N0,3]
141 | 
142 |   3: [1,N3,6]
143 | 
144 |    6: [2,N1,N2,11,12]
145 | 
146 |      11: [1,4,10608]
147 |      14: [1,4,10612]
148 | -}
149 | 
150 | example2'' :: IntTrieBuilder Word32 Word32
151 | example2'' = mktrie [ mknode 1 t1, mknode 2 t2 ]
152 |   where
153 |     t1   = mktrie [ mkleaf 4 10608 ]
154 |     t2   = mktrie [ mkleaf 4 10612 ]
155 | 
156 | example2''' :: IntTrieBuilder Word32 Word32
157 | example2''' = mktrie [ mknode 0 t3 ]
158 |   where
159 |     t3  = mktrie [ mknode 4 t8, mknode 6 t11 ]
160 |     t8  = mktrie [ mknode 1 t14 ]
161 |     t11 = mktrie [ mkleaf 5 10605 ]
162 |     t14 = mktrie [ mknode 2 t19, mknode 3 t22 ]
163 |     t19 = mktrie [ mkleaf 7 10608 ]
164 |     t22 = mktrie [ mkleaf 7 10612 ]
165 | {-
166 |  0: [1,N0,3]
167 |  3: [2,N4,N6,8,11]
168 |  8: [1,N1,11]
169 | 11: [1,5,10605]
170 | 14: [2,N2,N3,16,19]
171 | 19: [1,7,10608]
172 | 22: [1,7,10612]
173 | -}
174 | 
175 | -- We convert from the 'Paths' to the 'IntTrieBuilder' using 'inserts':
176 | --
177 | test1 = example2 == inserts example1 empty
178 | #endif
179 | 
180 | -- Each node has a size and a sequence of keys followed by an equal length
181 | -- sequnce of corresponding entries. Since we're going to flatten this into
182 | -- a single array then we will need to replace the trie structure with pointers
183 | -- represented as array offsets.
184 | 
185 | -- Each node is a pair of arrays, one of keys and one of Either value pointer.
186 | -- We need to distinguish values from internal pointers. We use a tag bit:
187 | --
188 | tagLeaf, tagNode, untag :: Word32 -> Word32
189 | tagLeaf = id
190 | tagNode = flip Bits.setBit   31
191 | untag   = flip Bits.clearBit 31
192 | 
193 | isNode :: Word32 -> Bool
194 | isNode = flip Bits.testBit 31
195 | 
196 | -- So the overall array form of the above trie is:
197 | --
198 | -- offset:   0   1    2    3   4  5  6    7    8     9     10  11  12
199 | -- array:  [ 1 | N1 | 3 ][ 3 | 2, 3, N4 | 512, 2048, 10 ][ 1 | 5 | 4096 ]
200 | --                     \__/                           \___/
201 | 
202 | #ifdef TESTS
203 | example3 :: [Word32]
204 | example3 =
205 |  [1, tagNode 1,
206 |      3,
207 |   3, tagLeaf 2, tagLeaf 3, tagNode 4,
208 |      512,       2048,      10,
209 |   1, tagLeaf 5,
210 |      4096
211 |  ]
212 | 
213 | -- We get the array form by using flattenTrie:
214 | 
215 | test2 = example3 == flattenTrie example2
216 | 
217 | example4 :: IntTrie Int Int
218 | example4 = IntTrie (mkArray example3)
219 | 
220 | mkArray :: [Word32] -> A.UArray Word32 Word32
221 | mkArray xs = A.listArray (0, fromIntegral (length xs) - 1) xs
222 | 
223 | test3 = case lookup example4 [1] of
224 |           Just (Completions [(2,_),(3,_),(4,_)]) -> True
225 |           _                          -> False
226 | 
227 | test1, test2, test3 :: Bool
228 | #endif
229 | 
230 | -------------------------------------
231 | -- Decoding the trie array form
232 | --
233 | 
234 | completionsFrom :: (Enum k, Enum v) => IntTrie k v -> Word32 -> Completions k v
235 | completionsFrom trie@(IntTrie arr) nodeOff =
236 |     [ (word32ToEnum (untag key), next)
237 |     | keyOff <- [keysStart..keysEnd]
238 |     , let key   = arr ! keyOff
239 |           entry = arr ! (keyOff + nodeSize)
240 |           next | isNode key = Completions (completionsFrom trie entry)
241 |                | otherwise  = Entry (word32ToEnum entry)
242 |     ]
243 |   where
244 |     nodeSize  = arr ! nodeOff
245 |     keysStart = nodeOff + 1
246 |     keysEnd   = nodeOff + nodeSize
247 | 
248 | -- | Convert the trie to a list
249 | --
250 | -- This is the left inverse to 'construct' (modulo ordering).
251 | toList :: forall k v. (Enum k, Enum v) => IntTrie k v -> [([k], v)]
252 | toList = concatMap (aux []) . (`completionsFrom` 0)
253 |   where
254 |     aux :: [k] -> (k, TrieLookup k v) -> [([k], v)]
255 |     aux ks (k, Entry v)        = [(reverse (k:ks), v)]
256 |     aux ks (k, Completions cs) = concatMap (aux (k:ks)) cs
257 | 
258 | -------------------------------------
259 | -- Toplevel trie array construction
260 | --
261 | 
262 | -- So constructing the 'IntTrie' as a whole is just a matter of stringing
263 | -- together all the bits
264 | 
265 | -- | Build an 'IntTrie' from a bunch of (key, value) pairs, where the keys
266 | -- are sequences.
267 | --
268 | construct :: (Enum k, Enum v) => [([k], v)] -> IntTrie k v
269 | construct = finalise . flip inserts empty
270 | 
271 | 
272 | ---------------------------------
273 | -- Looking up in the trie array
274 | --
275 | 
276 | data TrieLookup  k v = Entry !v | Completions (Completions k v) deriving Show
277 | type Completions k v = [(k, TrieLookup k v)]
278 | 
279 | lookup :: forall k v. (Enum k, Enum v) => IntTrie k v -> [k] -> Maybe (TrieLookup k v)
280 | lookup trie@(IntTrie arr) = go 0
281 |   where
282 |     go :: Word32 -> [k] -> Maybe (TrieLookup k v)
283 |     go nodeOff []     = Just (completions nodeOff)
284 |     go nodeOff (k:ks) = case search nodeOff (tagLeaf k') of
285 |       Just entryOff
286 |         | null ks   -> Just (entry entryOff)
287 |         | otherwise -> Nothing
288 |       Nothing       -> case search nodeOff (tagNode k') of
289 |         Nothing       -> Nothing
290 |         Just entryOff -> go (arr ! entryOff) ks
291 |       where
292 |         k' = enumToWord32 k
293 | 
294 |     entry       entryOff = Entry (word32ToEnum (arr ! entryOff))
295 |     completions nodeOff  = Completions (completionsFrom trie nodeOff)
296 | 
297 |     search :: Word32 -> Word32 -> Maybe Word32
298 |     search nodeOff key = fmap (+nodeSize) (bsearch keysStart keysEnd key)
299 |       where
300 |         nodeSize  = arr ! nodeOff
301 |         keysStart = nodeOff + 1
302 |         keysEnd   = nodeOff + nodeSize
303 | 
304 |     bsearch :: Word32 -> Word32 -> Word32 -> Maybe Word32
305 |     bsearch a b key
306 |       | a > b     = Nothing
307 |       | otherwise = case compare key (arr ! mid) of
308 |           LT -> bsearch a (mid-1) key
309 |           EQ -> Just mid
310 |           GT -> bsearch (mid+1) b key
311 |       where mid = (a + b) `div` 2
312 | 
313 | 
314 | enumToWord32 :: Enum n => n -> Word32
315 | enumToWord32 = fromIntegral . fromEnum
316 | 
317 | word32ToEnum :: Enum n => Word32 -> n
318 | word32ToEnum = toEnum . fromIntegral
319 | 
320 | 
321 | -------------------------
322 | -- Building Tries
323 | --
324 | 
325 | newtype IntTrieBuilder k v = IntTrieBuilder (IntMap (TrieNode k v))
326 |   deriving (Show, Eq)
327 | 
328 | data TrieNode k v = TrieLeaf {-# UNPACK #-} !Word32
329 |                   | TrieNode !(IntTrieBuilder k v)
330 |   deriving (Show, Eq)
331 | 
332 | empty :: IntTrieBuilder k v
333 | empty = IntTrieBuilder IntMap.empty
334 | 
335 | insert :: (Enum k, Enum v) => [k] -> v
336 |        -> IntTrieBuilder k v -> IntTrieBuilder k v
337 | insert []    _v t = t
338 | insert (k:ks) v t = insertTrie (fromEnum k) (map fromEnum ks) (enumToWord32 v) t
339 | 
340 | insertTrie :: Int -> [Int] -> Word32
341 |            -> IntTrieBuilder k v -> IntTrieBuilder k v
342 | insertTrie k ks v (IntTrieBuilder t) =
343 |     IntTrieBuilder $
344 |       IntMap.alter (\t' -> Just $! maybe (freshTrieNode  ks v)
345 |                                          (insertTrieNode ks v) t')
346 |                    k t
347 | 
348 | insertTrieNode :: [Int] -> Word32 -> TrieNode k v -> TrieNode k v
349 | insertTrieNode []     v  _           = TrieLeaf v
350 | insertTrieNode (k:ks) v (TrieLeaf _) = TrieNode (freshTrie  k ks v)
351 | insertTrieNode (k:ks) v (TrieNode t) = TrieNode (insertTrie k ks v t)
352 | 
353 | freshTrie :: Int -> [Int] -> Word32 -> IntTrieBuilder k v
354 | freshTrie k []      v =
355 |   IntTrieBuilder (IntMap.singleton k (TrieLeaf v))
356 | freshTrie k (k':ks) v =
357 |   IntTrieBuilder (IntMap.singleton k (TrieNode (freshTrie k' ks v)))
358 | 
359 | freshTrieNode :: [Int] -> Word32 -> TrieNode k v
360 | freshTrieNode []     v = TrieLeaf v
361 | freshTrieNode (k:ks) v = TrieNode (freshTrie k ks v)
362 | 
363 | inserts :: (Enum k, Enum v) => [([k], v)]
364 |         -> IntTrieBuilder k v -> IntTrieBuilder k v
365 | inserts kvs t = foldl' (\t' (ks, v) -> insert ks v t') t kvs
366 | 
367 | finalise :: IntTrieBuilder k v -> IntTrie k v
368 | finalise trie =
369 |     IntTrie $
370 |       A.listArray (0, fromIntegral (flatTrieLength trie) - 1)
371 |                   (flattenTrie trie)
372 | 
373 | unfinalise :: (Enum k, Enum v) => IntTrie k v -> IntTrieBuilder k v
374 | unfinalise trie =
375 |     go (completionsFrom trie 0)
376 |   where
377 |     go kns =
378 |       IntTrieBuilder $
379 |         IntMap.fromList
380 |           [ (fromEnum k, t)
381 |           | (k, n) <- kns
382 |           , let t = case n of
383 |                       Entry       v    -> TrieLeaf (enumToWord32 v)
384 |                       Completions kns' -> TrieNode (go kns')
385 |           ]
386 | 
387 | ---------------------------------
388 | -- Flattening Tries
389 | --
390 | 
391 | type Offset = Int
392 | 
393 | flatTrieLength :: IntTrieBuilder k v -> Int
394 | flatTrieLength (IntTrieBuilder tns) =
395 |     1
396 |   + 2 * IntMap.size tns
397 |   + sum [ flatTrieLength n | TrieNode n <- IntMap.elems tns ]
398 | 
399 | -- This is a breadth-first traversal. We keep a list of the tries that we are
400 | -- to write out next. Each of these have an offset allocated to them at the
401 | -- time we put them into the list. We keep a running offset so we know where
402 | -- to allocate next.
403 | --
404 | flattenTrie :: IntTrieBuilder k v -> [Word32]
405 | flattenTrie trie = go (queue [trie]) (size trie)
406 |   where
407 |     size (IntTrieBuilder tns) = 1 + 2 * IntMap.size tns
408 | 
409 |     go :: Q (IntTrieBuilder k v) -> Offset -> [Word32]
410 |     go todo !offset =
411 |       case dequeue todo of
412 |         Nothing                   -> []
413 |         Just (IntTrieBuilder tnodes, tries) ->
414 |             flat ++ go tries' offset'
415 |           where
416 |             !count = IntMap.size tnodes
417 |             flat   = fromIntegral count
418 |                    : Map.keys  keysValues
419 |                   ++ Map.elems keysValues
420 |             (!offset', !keysValues, !tries') =
421 | #if MIN_VERSION_containers(0,4,2)
422 |               IntMap.foldlWithKey' accumNodes
423 |                                    (offset, Map.empty, tries)
424 |                                    tnodes
425 | #else
426 |               foldl' (\a (k,v) -> accumNodes a k v)
427 |                      (offset, Map.empty, tries)
428 |                      (IntMap.toList tnodes)
429 | #endif
430 | 
431 |     accumNodes :: (Offset, Map.Map Word32 Word32, Q (IntTrieBuilder k v))
432 |                -> Int -> TrieNode k v
433 |                -> (Offset, Map.Map Word32 Word32, Q (IntTrieBuilder k v))
434 |     accumNodes (!off, !kvs, !tries) !k (TrieLeaf v) =
435 |         (off, kvs', tries)
436 |       where
437 |         kvs' = Map.insert (tagLeaf (int2Word32 k)) v kvs
438 | 
439 |     accumNodes (!off, !kvs, !tries) !k (TrieNode t) =
440 |         (off + size t, kvs', tries')
441 |       where
442 |         kvs'   = Map.insert (tagNode (int2Word32 k)) (int2Word32 off) kvs
443 |         tries' = enqueue tries t
444 | 
445 | data Q a = Q [a] [a]
446 | 
447 | queue :: [a] -> Q a
448 | queue xs = Q xs []
449 | 
450 | enqueue :: Q a -> a -> Q a
451 | enqueue (Q front  back) x = Q front (x : back)
452 | 
453 | dequeue :: Q a -> Maybe (a, Q a)
454 | dequeue (Q (x:xs) back)    = Just (x, Q xs back)
455 | dequeue (Q []     back)    = case reverse back of
456 |                                x:xs -> Just (x, Q xs [])
457 |                                []   -> Nothing
458 | 
459 | int2Word32 :: Int -> Word32
460 | int2Word32 = fromIntegral
461 | 
462 | 
463 | -------------------------
464 | -- (de)serialisation
465 | --
466 | 
467 | serialise :: IntTrie k v -> BS.Builder
468 | serialise (IntTrie arr) =
469 |     let (_, !ixEnd) = A.bounds arr in
470 |     BS.word32BE (ixEnd+1)
471 |  <> foldr (\n r -> BS.word32BE n <> r) mempty (A.elems arr)
472 | 
473 | serialiseSize :: IntTrie k v -> Int
474 | serialiseSize (IntTrie arr) =
475 |     let (_, ixEnd) = A.bounds arr in
476 |     4
477 |   + 4 * (fromIntegral ixEnd + 1)
478 | 
479 | deserialise :: BS.ByteString -> Maybe (IntTrie k v, BS.ByteString)
480 | deserialise bs
481 |   | BS.length bs >= 4
482 |   , let lenArr   = readWord32BE bs 0
483 |         lenTotal = 4 + 4 * fromIntegral lenArr
484 |   , BS.length bs >= 4 + 4 * fromIntegral lenArr
485 |   , let !arr = A.array (0, lenArr-1)
486 |                       [ (i, readWord32BE bs off)
487 |                       | (i, off) <- zip [0..lenArr-1] [4,8 .. lenTotal - 4] ]
488 |         !bs' = BS.drop lenTotal bs
489 |   = Just (IntTrie arr, bs')
490 | 
491 |   | otherwise
492 |   = Nothing
493 | 
494 | readWord32BE :: BS.ByteString -> Int -> Word32
495 | readWord32BE bs i =
496 |     assert (i >= 0 && i+3 <= BS.length bs - 1) $
497 |     fromIntegral (BS.unsafeIndex bs (i + 0)) `shiftL` 24
498 |   + fromIntegral (BS.unsafeIndex bs (i + 1)) `shiftL` 16
499 |   + fromIntegral (BS.unsafeIndex bs (i + 2)) `shiftL` 8
500 |   + fromIntegral (BS.unsafeIndex bs (i + 3))
501 | 
502 | 
503 | -------------------------
504 | -- Correctness property
505 | --
506 | 
507 | #ifdef TESTS
508 | 
509 | prop_lookup :: (Ord k, Enum k, Eq v, Enum v, Show k, Show v)
510 |             => [([k], v)] -> Bool
511 | prop_lookup paths =
512 |   flip all paths $ \(key, value) ->
513 |     case lookup trie key of
514 |       Just (Entry value') | value' == value -> True
515 |       Just (Entry value')   -> error $ "IntTrie: " ++ show (key, value, value')
516 |       Nothing               -> error $ "IntTrie: didn't find " ++ show key
517 |       Just (Completions xs) -> error $ "IntTrie: " ++ show xs
518 | 
519 |   where
520 |     trie = construct paths
521 | 
522 | prop_completions :: forall k v. (Ord k, Enum k, Eq v, Enum v) => [([k], v)] -> Bool
523 | prop_completions paths =
524 |     inserts paths empty 
525 |  == convertCompletions (completionsFrom (construct paths) 0)
526 |   where
527 |     convertCompletions :: Ord k => Completions k v -> IntTrieBuilder k v
528 |     convertCompletions kls =
529 |       IntTrieBuilder $
530 |         IntMap.fromList
531 |           [ case l of
532 |               Entry v          -> mkleaf k v
533 |               Completions kls' -> mknode k (convertCompletions kls')
534 |           | (k, l) <- sortBy (compare `on` fst) kls ]
535 | 
536 | 
537 | prop_lookup_mono :: ValidPaths -> Bool
538 | prop_lookup_mono (ValidPaths paths) = prop_lookup paths
539 | 
540 | prop_completions_mono :: ValidPaths -> Bool
541 | prop_completions_mono (ValidPaths paths) = prop_completions paths
542 | 
543 | prop_construct_toList :: ValidPaths -> Bool
544 | prop_construct_toList (ValidPaths paths) =
545 |        sortBy (compare `on` fst) (toList (construct paths))
546 |     == sortBy (compare `on` fst) paths
547 | 
548 | prop_finalise_unfinalise :: ValidPaths -> Bool
549 | prop_finalise_unfinalise (ValidPaths paths) =
550 |     builder == unfinalise (finalise builder)
551 |   where
552 |     builder :: IntTrieBuilder Char Char
553 |     builder = inserts paths empty
554 | 
555 | prop_serialise_deserialise :: ValidPaths -> Bool
556 | prop_serialise_deserialise (ValidPaths paths) =
557 |     Just (trie, BS.empty) == (deserialise
558 |                             . toStrict . BS.toLazyByteString
559 |                             . serialise) trie
560 |   where
561 |     trie :: IntTrie Char Char
562 |     trie = construct paths
563 | 
564 | prop_serialiseSize :: ValidPaths -> Bool
565 | prop_serialiseSize (ValidPaths paths) =
566 |     (fromIntegral . LBS.length . BS.toLazyByteString . serialise) trie
567 |  == serialiseSize trie
568 |   where
569 |     trie :: IntTrie Char Char
570 |     trie = construct paths
571 | 
572 | newtype ValidPaths = ValidPaths [([Char], Char)] deriving Show
573 | 
574 | instance Arbitrary ValidPaths where
575 |   arbitrary =
576 |       ValidPaths . makeNoPrefix <$> listOf ((,) <$> listOf1 arbitrary <*> arbitrary)
577 |     where
578 |       makeNoPrefix [] = []
579 |       makeNoPrefix ((k,v):kvs)
580 |         | all (\(k', _) -> not (isPrefixOfOther k k')) kvs
581 |                      = (k,v) : makeNoPrefix kvs
582 |         | otherwise  =         makeNoPrefix kvs
583 | 
584 |   shrink (ValidPaths kvs) =
585 |       map ValidPaths . filter noPrefix . filter nonEmpty . shrink $ kvs
586 |     where
587 |       noPrefix []           = True
588 |       noPrefix ((k,_):kvs') = all (\(k', _) -> not (isPrefixOfOther k k')) kvs'
589 |                            && noPrefix kvs'
590 |       nonEmpty = all (not . null . fst)
591 | 
592 | isPrefixOfOther a b = a `isPrefixOf` b || b `isPrefixOf` a
593 | 
594 | toStrict :: LBS.ByteString -> BS.ByteString
595 | #if MIN_VERSION_bytestring(0,10,0)
596 | toStrict = LBS.toStrict
597 | #else
598 | toStrict = BS.concat . LBS.toChunks
599 | #endif
600 | 
601 | #endif
602 | 
603 | #if !(MIN_VERSION_base(4,5,0))
604 | (<>) :: Monoid m => m -> m -> m
605 | (<>) = mappend
606 | #endif
607 | 
608 | 


--------------------------------------------------------------------------------
/Codec/Archive/Tar/Types.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP, GeneralizedNewtypeDeriving, BangPatterns #-}
  2 | -----------------------------------------------------------------------------
  3 | -- |
  4 | -- Module      :  Codec.Archive.Tar.Types
  5 | -- Copyright   :  (c) 2007 Bjorn Bringert,
  6 | --                    2008 Andrea Vezzosi,
  7 | --                    2008-2009 Duncan Coutts
  8 | --                    2011 Max Bolingbroke
  9 | -- License     :  BSD3
 10 | --
 11 | -- Maintainer  :  duncan@community.haskell.org
 12 | -- Portability :  portable
 13 | --
 14 | -- Types to represent the content of @.tar@ archives.
 15 | --
 16 | -----------------------------------------------------------------------------
 17 | module Codec.Archive.Tar.Types (
 18 | 
 19 |   Entry(..),
 20 |   entryPath,
 21 |   EntryContent(..),
 22 |   FileSize,
 23 |   Permissions,
 24 |   Ownership(..),
 25 |   EpochTime,
 26 |   TypeCode,
 27 |   DevMajor,
 28 |   DevMinor,
 29 |   Format(..),
 30 | 
 31 |   simpleEntry,
 32 |   fileEntry,
 33 |   directoryEntry,
 34 | 
 35 |   ordinaryFilePermissions,
 36 |   executableFilePermissions,
 37 |   directoryPermissions,
 38 | 
 39 |   TarPath(..),
 40 |   toTarPath,
 41 |   fromTarPath,
 42 |   fromTarPathToPosixPath,
 43 |   fromTarPathToWindowsPath,
 44 | 
 45 |   LinkTarget(..),
 46 |   toLinkTarget,
 47 |   fromLinkTarget,
 48 |   fromLinkTargetToPosixPath,
 49 |   fromLinkTargetToWindowsPath,
 50 | 
 51 |   Entries(..),
 52 |   mapEntries,
 53 |   mapEntriesNoFail,
 54 |   foldEntries,
 55 |   foldlEntries,
 56 |   unfoldEntries,
 57 | 
 58 | #ifdef TESTS
 59 |   limitToV7FormatCompat
 60 | #endif
 61 |   ) where
 62 | 
 63 | import Data.Int      (Int64)
 64 | import Data.Monoid   (Monoid(..))
 65 | import qualified Data.ByteString       as BS
 66 | import qualified Data.ByteString.Char8 as BS.Char8
 67 | import qualified Data.ByteString.Lazy  as LBS
 68 | import Control.DeepSeq
 69 | 
 70 | import qualified System.FilePath as FilePath.Native
 71 |          ( joinPath, splitDirectories, addTrailingPathSeparator )
 72 | import qualified System.FilePath.Posix as FilePath.Posix
 73 |          ( joinPath, splitPath, splitDirectories, hasTrailingPathSeparator
 74 |          , addTrailingPathSeparator )
 75 | import qualified System.FilePath.Windows as FilePath.Windows
 76 |          ( joinPath, addTrailingPathSeparator )
 77 | import System.Posix.Types
 78 |          ( FileMode )
 79 | 
 80 | #ifdef TESTS
 81 | import Test.QuickCheck
 82 | import Control.Applicative ((<$>), pure, (<*>))
 83 | #endif
 84 | 
 85 | 
 86 | type FileSize  = Int64
 87 | -- | The number of seconds since the UNIX epoch
 88 | type EpochTime = Int64
 89 | type DevMajor  = Int
 90 | type DevMinor  = Int
 91 | type TypeCode  = Char
 92 | type Permissions = FileMode
 93 | 
 94 | -- | Tar archive entry.
 95 | --
 96 | data Entry = Entry {
 97 | 
 98 |     -- | The path of the file or directory within the archive. This is in a
 99 |     -- tar-specific form. Use 'entryPath' to get a native 'FilePath'.
100 |     entryTarPath :: {-# UNPACK #-} !TarPath,
101 | 
102 |     -- | The real content of the entry. For 'NormalFile' this includes the
103 |     -- file data. An entry usually contains a 'NormalFile' or a 'Directory'.
104 |     entryContent :: !EntryContent,
105 | 
106 |     -- | File permissions (Unix style file mode).
107 |     entryPermissions :: {-# UNPACK #-} !Permissions,
108 | 
109 |     -- | The user and group to which this file belongs.
110 |     entryOwnership :: {-# UNPACK #-} !Ownership,
111 | 
112 |     -- | The time the file was last modified.
113 |     entryTime :: {-# UNPACK #-} !EpochTime,
114 | 
115 |     -- | The tar format the archive is using.
116 |     entryFormat :: !Format
117 |   }
118 |   deriving (Eq, Show)
119 | 
120 | -- | Native 'FilePath' of the file or directory within the archive.
121 | --
122 | entryPath :: Entry -> FilePath
123 | entryPath = fromTarPath . entryTarPath
124 | 
125 | -- | The content of a tar archive entry, which depends on the type of entry.
126 | --
127 | -- Portable archives should contain only 'NormalFile' and 'Directory'.
128 | --
129 | data EntryContent = NormalFile      LBS.ByteString {-# UNPACK #-} !FileSize
130 |                   | Directory
131 |                   | SymbolicLink    !LinkTarget
132 |                   | HardLink        !LinkTarget
133 |                   | CharacterDevice {-# UNPACK #-} !DevMajor
134 |                                     {-# UNPACK #-} !DevMinor
135 |                   | BlockDevice     {-# UNPACK #-} !DevMajor
136 |                                     {-# UNPACK #-} !DevMinor
137 |                   | NamedPipe
138 |                   | OtherEntryType  {-# UNPACK #-} !TypeCode LBS.ByteString
139 |                                     {-# UNPACK #-} !FileSize
140 |     deriving (Eq, Ord, Show)
141 | 
142 | data Ownership = Ownership {
143 |     -- | The owner user name. Should be set to @\"\"@ if unknown.
144 |     ownerName :: String,
145 | 
146 |     -- | The owner group name. Should be set to @\"\"@ if unknown.
147 |     groupName :: String,
148 | 
149 |     -- | Numeric owner user id. Should be set to @0@ if unknown.
150 |     ownerId :: {-# UNPACK #-} !Int,
151 | 
152 |     -- | Numeric owner group id. Should be set to @0@ if unknown.
153 |     groupId :: {-# UNPACK #-} !Int
154 |   }
155 |     deriving (Eq, Ord, Show)
156 | 
157 | -- | There have been a number of extensions to the tar file format over the
158 | -- years. They all share the basic entry fields and put more meta-data in
159 | -- different extended headers.
160 | --
161 | data Format =
162 | 
163 |      -- | This is the classic Unix V7 tar format. It does not support owner and
164 |      -- group names, just numeric Ids. It also does not support device numbers.
165 |      V7Format
166 | 
167 |      -- | The \"USTAR\" format is an extension of the classic V7 format. It was
168 |      -- later standardised by POSIX. It has some restrictions but is the most
169 |      -- portable format.
170 |      --
171 |    | UstarFormat
172 | 
173 |      -- | The GNU tar implementation also extends the classic V7 format, though
174 |      -- in a slightly different way from the USTAR format. In general for new
175 |      -- archives the standard USTAR/POSIX should be used.
176 |      --
177 |    | GnuFormat
178 |   deriving (Eq, Ord, Show)
179 | 
180 | instance NFData Entry where
181 |   rnf (Entry _ c _ _ _ _) = rnf c
182 | 
183 | instance NFData EntryContent where
184 |   rnf x = case x of
185 |       NormalFile       c _  -> rnflbs c
186 |       OtherEntryType _ c _  -> rnflbs c
187 |       _                     -> seq x ()
188 |     where
189 | #if MIN_VERSION_bytestring(0,10,0)
190 |       rnflbs = rnf
191 | #else
192 |       rnflbs = foldr (\ !_bs r -> r) () . LBS.toChunks
193 | #endif
194 | 
195 | instance NFData Ownership where
196 |   rnf (Ownership o g _ _) = rnf o `seq` rnf g
197 | 
198 | -- | @rw-r--r--@ for normal files
199 | ordinaryFilePermissions :: Permissions
200 | ordinaryFilePermissions   = 0o0644
201 | 
202 | -- | @rwxr-xr-x@ for executable files
203 | executableFilePermissions :: Permissions
204 | executableFilePermissions = 0o0755
205 | 
206 | -- | @rwxr-xr-x@ for directories
207 | directoryPermissions :: Permissions
208 | directoryPermissions  = 0o0755
209 | 
210 | -- | An 'Entry' with all default values except for the file name and type. It
211 | -- uses the portable USTAR/POSIX format (see 'UstarHeader').
212 | --
213 | -- You can use this as a basis and override specific fields, eg:
214 | --
215 | -- > (emptyEntry name HardLink) { linkTarget = target }
216 | --
217 | simpleEntry :: TarPath -> EntryContent -> Entry
218 | simpleEntry tarpath content = Entry {
219 |     entryTarPath     = tarpath,
220 |     entryContent     = content,
221 |     entryPermissions = case content of
222 |                          Directory -> directoryPermissions
223 |                          _         -> ordinaryFilePermissions,
224 |     entryOwnership   = Ownership "" "" 0 0,
225 |     entryTime        = 0,
226 |     entryFormat      = UstarFormat
227 |   }
228 | 
229 | -- | A tar 'Entry' for a file.
230 | --
231 | -- Entry  fields such as file permissions and ownership have default values.
232 | --
233 | -- You can use this as a basis and override specific fields. For example if you
234 | -- need an executable file you could use:
235 | --
236 | -- > (fileEntry name content) { fileMode = executableFileMode }
237 | --
238 | fileEntry :: TarPath -> LBS.ByteString -> Entry
239 | fileEntry name fileContent =
240 |   simpleEntry name (NormalFile fileContent (LBS.length fileContent))
241 | 
242 | -- | A tar 'Entry' for a directory.
243 | --
244 | -- Entry fields such as file permissions and ownership have default values.
245 | --
246 | directoryEntry :: TarPath -> Entry
247 | directoryEntry name = simpleEntry name Directory
248 | 
249 | --
250 | -- * Tar paths
251 | --
252 | 
253 | -- | The classic tar format allowed just 100 characters for the file name. The
254 | -- USTAR format extended this with an extra 155 characters, however it uses a
255 | -- complex method of splitting the name between the two sections.
256 | --
257 | -- Instead of just putting any overflow into the extended area, it uses the
258 | -- extended area as a prefix. The aggravating insane bit however is that the
259 | -- prefix (if any) must only contain a directory prefix. That is the split
260 | -- between the two areas must be on a directory separator boundary. So there is
261 | -- no simple calculation to work out if a file name is too long. Instead we
262 | -- have to try to find a valid split that makes the name fit in the two areas.
263 | --
264 | -- The rationale presumably was to make it a bit more compatible with old tar
265 | -- programs that only understand the classic format. A classic tar would be
266 | -- able to extract the file name and possibly some dir prefix, but not the
267 | -- full dir prefix. So the files would end up in the wrong place, but that's
268 | -- probably better than ending up with the wrong names too.
269 | --
270 | -- So it's understandable but rather annoying.
271 | --
272 | -- * Tar paths use Posix format (ie @\'/\'@ directory separators), irrespective
273 | --   of the local path conventions.
274 | --
275 | -- * The directory separator between the prefix and name is /not/ stored.
276 | --
277 | data TarPath = TarPath {-# UNPACK #-} !BS.ByteString -- path name, 100 characters max.
278 |                        {-# UNPACK #-} !BS.ByteString -- path prefix, 155 characters max.
279 |   deriving (Eq, Ord)
280 | 
281 | instance NFData TarPath where
282 |   rnf (TarPath _ _) = () -- fully strict by construction
283 | 
284 | instance Show TarPath where
285 |   show = show . fromTarPath
286 | 
287 | -- | Convert a 'TarPath' to a native 'FilePath'.
288 | --
289 | -- The native 'FilePath' will use the native directory separator but it is not
290 | -- otherwise checked for validity or sanity. In particular:
291 | --
292 | -- * The tar path may be invalid as a native path, eg the file name @\"nul\"@
293 | --   is not valid on Windows.
294 | --
295 | -- * The tar path may be an absolute path or may contain @\"..\"@ components.
296 | --   For security reasons this should not usually be allowed, but it is your
297 | --   responsibility to check for these conditions (eg using 'checkSecurity').
298 | --
299 | fromTarPath :: TarPath -> FilePath
300 | fromTarPath (TarPath namebs prefixbs) = adjustDirectory $
301 |   FilePath.Native.joinPath $ FilePath.Posix.splitDirectories prefix
302 |                           ++ FilePath.Posix.splitDirectories name
303 |   where
304 |     name   = BS.Char8.unpack namebs
305 |     prefix = BS.Char8.unpack prefixbs
306 |     adjustDirectory | FilePath.Posix.hasTrailingPathSeparator name
307 |                     = FilePath.Native.addTrailingPathSeparator
308 |                     | otherwise = id
309 | 
310 | -- | Convert a 'TarPath' to a Unix\/Posix 'FilePath'.
311 | --
312 | -- The difference compared to 'fromTarPath' is that it always returns a Unix
313 | -- style path irrespective of the current operating system.
314 | --
315 | -- This is useful to check how a 'TarPath' would be interpreted on a specific
316 | -- operating system, eg to perform portability checks.
317 | --
318 | fromTarPathToPosixPath :: TarPath -> FilePath
319 | fromTarPathToPosixPath (TarPath namebs prefixbs) = adjustDirectory $
320 |   FilePath.Posix.joinPath $ FilePath.Posix.splitDirectories prefix
321 |                          ++ FilePath.Posix.splitDirectories name
322 |   where
323 |     name   = BS.Char8.unpack namebs
324 |     prefix = BS.Char8.unpack prefixbs
325 |     adjustDirectory | FilePath.Posix.hasTrailingPathSeparator name
326 |                     = FilePath.Posix.addTrailingPathSeparator
327 |                     | otherwise = id
328 | 
329 | -- | Convert a 'TarPath' to a Windows 'FilePath'.
330 | --
331 | -- The only difference compared to 'fromTarPath' is that it always returns a
332 | -- Windows style path irrespective of the current operating system.
333 | --
334 | -- This is useful to check how a 'TarPath' would be interpreted on a specific
335 | -- operating system, eg to perform portability checks.
336 | --
337 | fromTarPathToWindowsPath :: TarPath -> FilePath
338 | fromTarPathToWindowsPath (TarPath namebs prefixbs) = adjustDirectory $
339 |   FilePath.Windows.joinPath $ FilePath.Posix.splitDirectories prefix
340 |                            ++ FilePath.Posix.splitDirectories name
341 |   where
342 |     name   = BS.Char8.unpack namebs
343 |     prefix = BS.Char8.unpack prefixbs
344 |     adjustDirectory | FilePath.Posix.hasTrailingPathSeparator name
345 |                     = FilePath.Windows.addTrailingPathSeparator
346 |                     | otherwise = id
347 | 
348 | -- | Convert a native 'FilePath' to a 'TarPath'.
349 | --
350 | -- The conversion may fail if the 'FilePath' is too long. See 'TarPath' for a
351 | -- description of the problem with splitting long 'FilePath's.
352 | --
353 | toTarPath :: Bool -- ^ Is the path for a directory? This is needed because for
354 |                   -- directories a 'TarPath' must always use a trailing @\/@.
355 |           -> FilePath -> Either String TarPath
356 | toTarPath isDir = splitLongPath
357 |                 . addTrailingSep
358 |                 . FilePath.Posix.joinPath
359 |                 . FilePath.Native.splitDirectories
360 |   where
361 |     addTrailingSep | isDir     = FilePath.Posix.addTrailingPathSeparator
362 |                    | otherwise = id
363 | 
364 | -- | Take a sanitised path, split on directory separators and try to pack it
365 | -- into the 155 + 100 tar file name format.
366 | --
367 | -- The strategy is this: take the name-directory components in reverse order
368 | -- and try to fit as many components into the 100 long name area as possible.
369 | -- If all the remaining components fit in the 155 name area then we win.
370 | --
371 | splitLongPath :: FilePath -> Either String TarPath
372 | splitLongPath path =
373 |   case packName nameMax (reverse (FilePath.Posix.splitPath path)) of
374 |     Left err                 -> Left err
375 |     Right (name, [])         -> Right $! TarPath (BS.Char8.pack name)
376 |                                                   BS.empty
377 |     Right (name, first:rest) -> case packName prefixMax remainder of
378 |       Left err               -> Left err
379 |       Right (_     , (_:_))  -> Left "File name too long (cannot split)"
380 |       Right (prefix, [])     -> Right $! TarPath (BS.Char8.pack name)
381 |                                                  (BS.Char8.pack prefix)
382 |       where
383 |         -- drop the '/' between the name and prefix:
384 |         remainder = init first : rest
385 | 
386 |   where
387 |     nameMax, prefixMax :: Int
388 |     nameMax   = 100
389 |     prefixMax = 155
390 | 
391 |     packName _      []     = Left "File name empty"
392 |     packName maxLen (c:cs)
393 |       | n > maxLen         = Left "File name too long"
394 |       | otherwise          = Right (packName' maxLen n [c] cs)
395 |       where n = length c
396 | 
397 |     packName' maxLen n ok (c:cs)
398 |       | n' <= maxLen             = packName' maxLen n' (c:ok) cs
399 |                                      where n' = n + length c
400 |     packName' _      _ ok    cs  = (FilePath.Posix.joinPath ok, cs)
401 | 
402 | -- | The tar format allows just 100 ASCII characters for the 'SymbolicLink' and
403 | -- 'HardLink' entry types.
404 | --
405 | newtype LinkTarget = LinkTarget BS.ByteString
406 |   deriving (Eq, Ord, Show)
407 | 
408 | instance NFData LinkTarget where
409 | #if MIN_VERSION_bytestring(0,10,0)
410 |     rnf (LinkTarget bs) = rnf bs
411 | #else
412 |     rnf (LinkTarget !_bs) = ()
413 | #endif
414 | 
415 | -- | Convert a native 'FilePath' to a tar 'LinkTarget'. This may fail if the
416 | -- string is longer than 100 characters or if it contains non-portable
417 | -- characters.
418 | --
419 | toLinkTarget   :: FilePath -> Maybe LinkTarget
420 | toLinkTarget path | length path <= 100 = Just $! LinkTarget (BS.Char8.pack path)
421 |                   | otherwise          = Nothing
422 | 
423 | -- | Convert a tar 'LinkTarget' to a native 'FilePath'.
424 | --
425 | fromLinkTarget :: LinkTarget -> FilePath
426 | fromLinkTarget (LinkTarget pathbs) = adjustDirectory $
427 |   FilePath.Native.joinPath $ FilePath.Posix.splitDirectories path
428 |   where
429 |     path = BS.Char8.unpack pathbs
430 |     adjustDirectory | FilePath.Posix.hasTrailingPathSeparator path
431 |                     = FilePath.Native.addTrailingPathSeparator
432 |                     | otherwise = id
433 | 
434 | -- | Convert a tar 'LinkTarget' to a Unix/Posix 'FilePath'.
435 | --
436 | fromLinkTargetToPosixPath :: LinkTarget -> FilePath
437 | fromLinkTargetToPosixPath (LinkTarget pathbs) = adjustDirectory $
438 |   FilePath.Posix.joinPath $ FilePath.Posix.splitDirectories path
439 |   where
440 |     path = BS.Char8.unpack pathbs
441 |     adjustDirectory | FilePath.Posix.hasTrailingPathSeparator path
442 |                     = FilePath.Native.addTrailingPathSeparator
443 |                     | otherwise = id
444 | 
445 | -- | Convert a tar 'LinkTarget' to a Windows 'FilePath'.
446 | --
447 | fromLinkTargetToWindowsPath :: LinkTarget -> FilePath
448 | fromLinkTargetToWindowsPath (LinkTarget pathbs) = adjustDirectory $
449 |   FilePath.Windows.joinPath $ FilePath.Posix.splitDirectories path
450 |   where
451 |     path = BS.Char8.unpack pathbs
452 |     adjustDirectory | FilePath.Posix.hasTrailingPathSeparator path
453 |                     = FilePath.Windows.addTrailingPathSeparator
454 |                     | otherwise = id
455 | 
456 | --
457 | -- * Entries type
458 | --
459 | 
460 | -- | A tar archive is a sequence of entries.
461 | --
462 | -- The point of this type as opposed to just using a list is that it makes the
463 | -- failure case explicit. We need this because the sequence of entries we get
464 | -- from reading a tarball can include errors.
465 | --
466 | -- It is a concrete data type so you can manipulate it directly but it is often
467 | -- clearer to use the provided functions for mapping, folding and unfolding.
468 | --
469 | -- Converting from a list can be done with just @foldr Next Done@. Converting
470 | -- back into a list can be done with 'foldEntries' however in that case you
471 | -- must be prepared to handle the 'Fail' case inherent in the 'Entries' type.
472 | --
473 | -- The 'Monoid' instance lets you concatenate archives or append entries to an
474 | -- archive.
475 | --
476 | data Entries e = Next Entry (Entries e)
477 |                | Done
478 |                | Fail e
479 |   deriving (Eq, Show)
480 | 
481 | infixr 5 `Next`
482 | 
483 | -- | This is like the standard 'unfoldr' function on lists, but for 'Entries'.
484 | -- It includes failure as an extra possibility that the stepper function may
485 | -- return.
486 | --
487 | -- It can be used to generate 'Entries' from some other type. For example it is
488 | -- used internally to lazily unfold entries from a 'LBS.ByteString'.
489 | --
490 | unfoldEntries :: (a -> Either e (Maybe (Entry, a))) -> a -> Entries e
491 | unfoldEntries f = unfold
492 |   where
493 |     unfold x = case f x of
494 |       Left err             -> Fail err
495 |       Right Nothing        -> Done
496 |       Right (Just (e, x')) -> Next e (unfold x')
497 | 
498 | -- | This is like the standard 'foldr' function on lists, but for 'Entries'.
499 | -- Compared to 'foldr' it takes an extra function to account for the
500 | -- possibility of failure.
501 | --
502 | -- This is used to consume a sequence of entries. For example it could be used
503 | -- to scan a tarball for problems or to collect an index of the contents.
504 | --
505 | foldEntries :: (Entry -> a -> a) -> a -> (e -> a) -> Entries e -> a
506 | foldEntries next done fail' = fold
507 |   where
508 |     fold (Next e es) = next e (fold es)
509 |     fold Done        = done
510 |     fold (Fail err)  = fail' err
511 | 
512 | -- | A 'foldl'-like function on Entries. It either returns the final
513 | -- accumulator result, or the failure along with the intermediate accumulator
514 | -- value.
515 | --
516 | foldlEntries :: (a -> Entry -> a) -> a -> Entries e -> Either (e, a) a
517 | foldlEntries f z = go z
518 |   where
519 |     go !acc (Next e es) = go (f acc e) es
520 |     go !acc  Done       = Right acc
521 |     go !acc (Fail err)  = Left (err, acc)
522 | 
523 | -- | This is like the standard 'map' function on lists, but for 'Entries'. It
524 | -- includes failure as a extra possible outcome of the mapping function.
525 | --
526 | -- If your mapping function cannot fail it may be more convenient to use
527 | -- 'mapEntriesNoFail'
528 | mapEntries :: (Entry -> Either e' Entry) -> Entries e -> Entries (Either e e')
529 | mapEntries f =
530 |   foldEntries (\entry rest -> either (Fail . Right) (flip Next rest) (f entry)) Done (Fail . Left)
531 | 
532 | -- | Like 'mapEntries' but the mapping function itself cannot fail.
533 | --
534 | mapEntriesNoFail :: (Entry -> Entry) -> Entries e -> Entries e
535 | mapEntriesNoFail f =
536 |   foldEntries (\entry -> Next (f entry)) Done Fail
537 | 
538 | instance Monoid (Entries e) where
539 |   mempty      = Done
540 |   mappend a b = foldEntries Next b Fail a
541 | 
542 | instance Functor Entries where
543 |   fmap f = foldEntries Next Done (Fail . f)
544 | 
545 | instance NFData e => NFData (Entries e) where
546 |   rnf (Next e es) = rnf e `seq` rnf es
547 |   rnf  Done       = ()
548 |   rnf (Fail e)    = rnf e
549 | 
550 | 
551 | -------------------------
552 | -- QuickCheck instances
553 | --
554 | 
555 | #ifdef TESTS
556 | 
557 | instance Arbitrary Entry where
558 |   arbitrary = Entry <$> arbitrary <*> arbitrary <*> arbitraryPermissions
559 |                     <*> arbitrary <*> arbitraryEpochTime <*> arbitrary
560 |     where
561 |       arbitraryPermissions :: Gen Permissions
562 |       arbitraryPermissions = fromIntegral <$> (arbitraryOctal 7 :: Gen Int)
563 | 
564 |       arbitraryEpochTime :: Gen EpochTime
565 |       arbitraryEpochTime = fromIntegral <$> (arbitraryOctal 11 :: Gen Int)
566 | 
567 |   shrink (Entry path content perms author time format) =
568 |       [ Entry path' content' perms author' time' format 
569 |       | (path', content', author', time') <-
570 |          shrink (path, content, author, time) ]
571 |    ++ [ Entry path content perms' author time format
572 |       | perms' <- shrinkIntegral perms ]
573 | 
574 | instance Arbitrary TarPath where
575 |   arbitrary = either error id
576 |             . toTarPath False
577 |             . FilePath.Posix.joinPath
578 |           <$> listOf1ToN (255 `div` 5)
579 |                          (elements (map (replicate 4) "abcd"))
580 | 
581 |   shrink = map (either error id . toTarPath False)
582 |          . map FilePath.Posix.joinPath
583 |          . filter (not . null)
584 |          . shrinkList shrinkNothing
585 |          . FilePath.Posix.splitPath
586 |          . fromTarPathToPosixPath
587 | 
588 | instance Arbitrary LinkTarget where
589 |   arbitrary = maybe (error "link target too large") id
590 |             . toLinkTarget
591 |             . FilePath.Native.joinPath
592 |           <$> listOf1ToN (100 `div` 5)
593 |                          (elements (map (replicate 4) "abcd"))
594 | 
595 |   shrink = map (maybe (error "link target too large") id . toLinkTarget)
596 |          . map FilePath.Posix.joinPath
597 |          . filter (not . null)
598 |          . shrinkList shrinkNothing
599 |          . FilePath.Posix.splitPath
600 |          . fromLinkTargetToPosixPath
601 | 
602 | 
603 | listOf1ToN :: Int -> Gen a -> Gen [a]
604 | listOf1ToN n g = sized $ \sz -> do
605 |     n <- choose (1, min n (max 1 sz))
606 |     vectorOf n g
607 | 
608 | listOf0ToN :: Int -> Gen a -> Gen [a]
609 | listOf0ToN n g = sized $ \sz -> do
610 |     n <- choose (0, min n sz)
611 |     vectorOf n g
612 | 
613 | instance Arbitrary EntryContent where
614 |   arbitrary =
615 |     frequency
616 |       [ (16, do bs <- arbitrary;
617 |                 return (NormalFile bs (LBS.length bs)))
618 |       , (2, pure Directory)
619 |       , (1, SymbolicLink    <$> arbitrary)
620 |       , (1, HardLink        <$> arbitrary)
621 |       , (1, CharacterDevice <$> arbitraryOctal 7 <*> arbitraryOctal 7)
622 |       , (1, BlockDevice     <$> arbitraryOctal 7 <*> arbitraryOctal 7)
623 |       , (1, pure NamedPipe)
624 |       , (1, do c  <- elements (['A'..'Z']++['a'..'z'])
625 |                bs <- arbitrary;
626 |                return (OtherEntryType c bs (LBS.length bs)))
627 |       ]
628 | 
629 |   shrink (NormalFile bs _)   = [ NormalFile bs' (LBS.length bs') 
630 |                                | bs' <- shrink bs ]
631 |   shrink  Directory          = []
632 |   shrink (SymbolicLink link) = [ SymbolicLink link' | link' <- shrink link ]
633 |   shrink (HardLink     link) = [ HardLink     link' | link' <- shrink link ]
634 |   shrink (CharacterDevice ma mi) = [ CharacterDevice ma' mi'
635 |                                    | (ma', mi') <- shrink (ma, mi) ]
636 |   shrink (BlockDevice     ma mi) = [ BlockDevice ma' mi'
637 |                                    | (ma', mi') <- shrink (ma, mi) ]
638 |   shrink  NamedPipe              = []
639 |   shrink (OtherEntryType c bs _) = [ OtherEntryType c bs' (LBS.length bs') 
640 |                                    | bs' <- shrink bs ]
641 | 
642 | instance Arbitrary LBS.ByteString where
643 |   arbitrary = fmap LBS.pack arbitrary
644 |   shrink    = map LBS.pack . shrink . LBS.unpack
645 | 
646 | instance Arbitrary BS.ByteString where
647 |   arbitrary = fmap BS.pack arbitrary
648 |   shrink    = map BS.pack . shrink . BS.unpack
649 | 
650 | instance Arbitrary Ownership where
651 |   arbitrary = Ownership <$> name <*> name
652 |                         <*> idno <*> idno
653 |     where
654 |       name = listOf0ToN 32 (arbitrary `suchThat` (/= '\0'))
655 |       idno = arbitraryOctal 7
656 | 
657 |   shrink (Ownership oname gname oid gid) =
658 |     [ Ownership oname' gname' oid' gid'
659 |     | (oname', gname', oid', gid') <- shrink (oname, gname, oid, gid) ]
660 | 
661 | instance Arbitrary Format where
662 |   arbitrary = elements [V7Format, UstarFormat, GnuFormat]
663 | 
664 | 
665 | --arbitraryOctal :: (Integral n, Random n) => Int -> Gen n
666 | arbitraryOctal n =
667 |     oneof [ pure 0
668 |           , choose (0, upperBound)
669 |           , pure upperBound
670 |           ]
671 |   where
672 |     upperBound = 8^n-1
673 | 
674 | -- For QC tests it's useful to have a way to limit the info to that which can
675 | -- be expressed in the old V7 format
676 | limitToV7FormatCompat :: Entry -> Entry
677 | limitToV7FormatCompat entry@Entry { entryFormat = V7Format } =
678 |     entry {
679 |       entryContent = case entryContent entry of
680 |         CharacterDevice _ _ -> OtherEntryType  '3' LBS.empty 0
681 |         BlockDevice     _ _ -> OtherEntryType  '4' LBS.empty 0
682 |         Directory           -> OtherEntryType  '5' LBS.empty 0
683 |         NamedPipe           -> OtherEntryType  '6' LBS.empty 0
684 |         other               -> other,
685 | 
686 |       entryOwnership = (entryOwnership entry) {
687 |         groupName = "",
688 |         ownerName = ""
689 |       },
690 | 
691 |       entryTarPath = let TarPath name _prefix = entryTarPath entry
692 |                       in TarPath name BS.empty
693 |     }
694 | limitToV7FormatCompat entry = entry
695 | 
696 | #endif
697 | 
698 | 


--------------------------------------------------------------------------------
/Codec/Archive/Tar/Index.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP, BangPatterns, PatternGuards #-}
  2 | {-# LANGUAGE GeneralizedNewtypeDeriving, DeriveDataTypeable #-}
  3 | 
  4 | -----------------------------------------------------------------------------
  5 | -- |
  6 | -- Module      :  Codec.Archive.Tar.Index
  7 | -- Copyright   :  (c) 2010-2015 Duncan Coutts
  8 | -- License     :  BSD3
  9 | --
 10 | -- Maintainer  :  duncan@community.haskell.org
 11 | -- Portability :  portable
 12 | --
 13 | -- Random access to the content of a @.tar@ archive.
 14 | --
 15 | -- This module uses common names and so is designed to be imported qualified:
 16 | --
 17 | -- > import qualified Codec.Archive.Tar.Index as TarIndex
 18 | --
 19 | -----------------------------------------------------------------------------
 20 | module Codec.Archive.Tar.Index (
 21 | 
 22 |     -- | The @tar@ format does not contain an index of files within the
 23 |     -- archive. Normally, @tar@ file have to be processed linearly. It is
 24 |     -- sometimes useful however to be able to get random access to files
 25 |     -- within the archive.
 26 |     --
 27 |     -- This module provides an index of a @tar@ file. A linear pass of the
 28 |     -- @tar@ file is needed to 'build' the 'TarIndex', but thereafter you can
 29 |     -- 'lookup' paths in the @tar@ file, and then use 'hReadEntry' to
 30 |     -- seek to the right part of the file and read the entry.
 31 |     --
 32 |     -- An index cannot be used to lookup 'Directory' entries in a tar file;
 33 |     -- instead, you will get 'TarDir' entry listing all the entries in the
 34 |     -- directory.
 35 | 
 36 |     -- * Index type
 37 |     TarIndex,
 38 | 
 39 |     -- * Index lookup
 40 |     lookup,
 41 |     TarIndexEntry(..),
 42 |     toList,
 43 | 
 44 |     -- ** I\/O operations
 45 |     TarEntryOffset,
 46 |     hReadEntry,
 47 |     hReadEntryHeader,
 48 | 
 49 |     -- * Index construction
 50 |     build,
 51 |     -- ** Incremental construction
 52 |     -- $incremental-construction
 53 |     IndexBuilder,
 54 |     empty,
 55 |     addNextEntry,
 56 |     skipNextEntry,
 57 |     finalise,
 58 |     unfinalise,
 59 | 
 60 |     -- * Serialising indexes
 61 |     serialise,
 62 |     deserialise,
 63 | 
 64 |     -- * Lower level operations with offsets and I\/O on tar files
 65 |     hReadEntryHeaderOrEof,
 66 |     hSeekEntryOffset,
 67 |     hSeekEntryContentOffset,
 68 |     hSeekEndEntryOffset,
 69 |     nextEntryOffset,
 70 |     indexEndEntryOffset,
 71 |     indexNextEntryOffset,
 72 | 
 73 |     -- * Deprecated aliases
 74 |     emptyIndex,
 75 |     finaliseIndex,
 76 | 
 77 | #ifdef TESTS
 78 |     prop_lookup,
 79 |     prop_toList,
 80 |     prop_valid,
 81 |     prop_serialise_deserialise,
 82 |     prop_serialiseSize,
 83 |     prop_index_matches_tar,
 84 |     prop_finalise_unfinalise,
 85 | #endif
 86 |   ) where
 87 | 
 88 | import Data.Typeable (Typeable)
 89 | 
 90 | import Codec.Archive.Tar.Types as Tar
 91 | import Codec.Archive.Tar.Read  as Tar
 92 | import qualified Codec.Archive.Tar.Index.StringTable as StringTable
 93 | import Codec.Archive.Tar.Index.StringTable (StringTable, StringTableBuilder)
 94 | import qualified Codec.Archive.Tar.Index.IntTrie as IntTrie
 95 | import Codec.Archive.Tar.Index.IntTrie (IntTrie, IntTrieBuilder)
 96 | 
 97 | import qualified System.FilePath.Posix as FilePath
 98 | import Data.Monoid (Monoid(..))
 99 | #if (MIN_VERSION_base(4,5,0))
100 | import Data.Monoid ((<>))
101 | #endif
102 | import Data.Word
103 | import Data.Int
104 | import Data.Bits
105 | import qualified Data.Array.Unboxed as A
106 | import Prelude hiding (lookup)
107 | import System.IO
108 | import Control.Exception (assert, throwIO)
109 | import Control.DeepSeq
110 | 
111 | import qualified Data.ByteString        as BS
112 | import qualified Data.ByteString.Char8  as BS.Char8
113 | import qualified Data.ByteString.Lazy   as LBS
114 | import qualified Data.ByteString.Unsafe as BS
115 | #if MIN_VERSION_bytestring(0,10,2) || defined(MIN_VERSION_bytestring_builder)
116 | import Data.ByteString.Builder          as BS
117 | import Data.ByteString.Builder.Extra    as BS (toLazyByteStringWith,
118 |                                                untrimmedStrategy)
119 | #else
120 | import Data.ByteString.Lazy.Builder     as BS
121 | import Data.ByteString.Lazy.Builder.Extras as BS (toLazyByteStringWith,
122 |                                                   untrimmedStrategy)
123 | #endif
124 | 
125 | #ifdef TESTS
126 | import qualified Prelude
127 | import Test.QuickCheck
128 | import Test.QuickCheck.Property (ioProperty)
129 | import Control.Applicative ((<$>), (<*>))
130 | import Control.Monad (unless)
131 | import Data.List (nub, sort, sortBy, stripPrefix, isPrefixOf)
132 | import Data.Maybe
133 | import Data.Function (on)
134 | import Control.Exception (SomeException, try)
135 | import Codec.Archive.Tar.Write          as Tar
136 | import qualified Data.ByteString.Handle as HBS
137 | #endif
138 | 
139 | 
140 | -- | An index of the entries in a tar file.
141 | --
142 | -- This index type is designed to be quite compact and suitable to store either
143 | -- on disk or in memory.
144 | --
145 | data TarIndex = TarIndex
146 | 
147 |   -- As an example of how the mapping works, consider these example files:
148 |   --   "foo/bar.hs" at offset 0
149 |   --   "foo/baz.hs" at offset 1024
150 |   --
151 |   -- We split the paths into components and enumerate them.
152 |   --   { "foo" -> TokenId 0, "bar.hs" -> TokenId 1,  "baz.hs" -> TokenId 2 }
153 |   --
154 |   -- We convert paths into sequences of 'TokenId's, i.e.
155 |   --   "foo/bar.hs" becomes [PathComponentId 0, PathComponentId 1]
156 |   --   "foo/baz.hs" becomes [PathComponentId 0, PathComponentId 2]
157 |   --
158 |   -- We use a trie mapping sequences of 'PathComponentId's to the entry offset:
159 |   --  { [PathComponentId 0, PathComponentId 1] -> offset 0
160 |   --  , [PathComponentId 0, PathComponentId 2] -> offset 1024 }
161 | 
162 |   -- The mapping of filepath components as strings to ids.
163 |   {-# UNPACK #-} !(StringTable PathComponentId)
164 | 
165 |   -- Mapping of sequences of filepath component ids to tar entry offsets.
166 |   {-# UNPACK #-} !(IntTrie PathComponentId TarEntryOffset)
167 | 
168 |   -- The offset immediatly after the last entry, where we would append any
169 |   -- additional entries.
170 |   {-# UNPACK #-} !TarEntryOffset
171 | 
172 |   deriving (Eq, Show, Typeable)
173 | 
174 | instance NFData TarIndex where
175 |   rnf (TarIndex _ _ _) = () -- fully strict by construction
176 | 
177 | -- | The result of 'lookup' in a 'TarIndex'. It can either be a file directly,
178 | -- or a directory entry containing further entries (and all subdirectories
179 | -- recursively). Note that the subtrees are constructed lazily, so it's
180 | -- cheaper if you don't look at them.
181 | --
182 | data TarIndexEntry = TarFileEntry {-# UNPACK #-} !TarEntryOffset
183 |                    | TarDir [(FilePath, TarIndexEntry)]
184 |   deriving (Show, Typeable)
185 | 
186 | 
187 | newtype PathComponentId = PathComponentId Int
188 |   deriving (Eq, Ord, Enum, Show, Typeable)
189 | 
190 | -- | An offset within a tar file. Use 'hReadEntry', 'hReadEntryHeader' or
191 | -- 'hSeekEntryOffset'.
192 | --
193 | -- This is actually a tar \"record\" number, not a byte offset.
194 | --
195 | type TarEntryOffset = Word32
196 | 
197 | 
198 | -- | Look up a given filepath in the 'TarIndex'. It may return a 'TarFileEntry'
199 | -- containing the 'TarEntryOffset' of the file within the tar file, or if
200 | -- the filepath identifies a directory then it returns a 'TarDir' containing
201 | -- the list of files within that directory.
202 | --
203 | -- Given the 'TarEntryOffset' you can then use one of the I\/O operations:
204 | --
205 | -- * 'hReadEntry' to read the whole entry;
206 | --
207 | -- * 'hReadEntryHeader' to read just the file metadata (e.g. its length);
208 | --
209 | lookup :: TarIndex -> FilePath -> Maybe TarIndexEntry
210 | lookup (TarIndex pathTable pathTrie _) path = do
211 |     fpath  <- toComponentIds pathTable path
212 |     tentry <- IntTrie.lookup pathTrie fpath
213 |     return (mkIndexEntry tentry)
214 |   where
215 |     mkIndexEntry (IntTrie.Entry offset)        = TarFileEntry offset
216 |     mkIndexEntry (IntTrie.Completions entries) =
217 |       TarDir [ (fromComponentId pathTable key, mkIndexEntry entry)
218 |              | (key, entry) <- entries ]
219 | 
220 | 
221 | toComponentIds :: StringTable PathComponentId -> FilePath -> Maybe [PathComponentId]
222 | toComponentIds table =
223 |     lookupComponents []
224 |   . filter (/= BS.Char8.singleton '.')
225 |   . splitDirectories
226 |   . BS.Char8.pack
227 |   where
228 |     lookupComponents cs' []     = Just (reverse cs')
229 |     lookupComponents cs' (c:cs) = case StringTable.lookup table c of
230 |       Nothing  -> Nothing
231 |       Just cid -> lookupComponents (cid:cs') cs
232 | 
233 | fromComponentId :: StringTable PathComponentId -> PathComponentId -> FilePath
234 | fromComponentId table = BS.Char8.unpack . StringTable.index table
235 | 
236 | -- | All the files in the index with their corresponding 'TarEntryOffset's.
237 | --
238 | -- Note that the files are in no special order. If you intend to read all or
239 | -- most files then is is recommended to sort by the 'TarEntryOffset'.
240 | --
241 | toList :: TarIndex -> [(FilePath, TarEntryOffset)]
242 | toList (TarIndex pathTable pathTrie _) =
243 |     [ (path, off)
244 |     | (cids, off) <- IntTrie.toList pathTrie
245 |     , let path = FilePath.joinPath (map (fromComponentId pathTable) cids) ]
246 | 
247 | 
248 | -- | Build a 'TarIndex' from a sequence of tar 'Entries'. The 'Entries' are
249 | -- assumed to start at offset @0@ within a file.
250 | --
251 | build :: Entries e -> Either e TarIndex
252 | build = go empty
253 |   where
254 |     go !builder (Next e es) = go (addNextEntry e builder) es
255 |     go !builder  Done       = Right $! finalise builder
256 |     go !_       (Fail err)  = Left err
257 | 
258 | 
259 | -- $incremental-construction
260 | -- If you need more control than 'build' then you can construct the index
261 | -- in an acumulator style using the 'IndexBuilder' and operations.
262 | --
263 | -- Start with 'empty' and use 'addNextEntry' (or 'skipNextEntry') for
264 | -- each 'Entry' in the tar file in order. Every entry must added or skipped in
265 | -- order, otherwise the resulting 'TarIndex' will report the wrong
266 | -- 'TarEntryOffset's. At the end use 'finalise' to get the 'TarIndex'.
267 | --
268 | -- For example, 'build' is simply:
269 | --
270 | -- > build = go empty
271 | -- >   where
272 | -- >     go !builder (Next e es) = go (addNextEntry e builder) es
273 | -- >     go !builder  Done       = Right $! finalise builder
274 | -- >     go !_       (Fail err)  = Left err
275 | 
276 | 
277 | -- | The intermediate type used for incremental construction of a 'TarIndex'.
278 | --
279 | data IndexBuilder
280 |    = IndexBuilder !(StringTableBuilder PathComponentId)
281 |                   !(IntTrieBuilder PathComponentId TarEntryOffset)
282 |    {-# UNPACK #-} !TarEntryOffset
283 |   deriving (Eq, Show)
284 | 
285 | instance NFData IndexBuilder where
286 |   rnf (IndexBuilder _ _ _) = () -- fully strict by construction
287 | 
288 | -- | The initial empty 'IndexBuilder'.
289 | --
290 | empty :: IndexBuilder
291 | empty = IndexBuilder StringTable.empty IntTrie.empty 0
292 | 
293 | emptyIndex :: IndexBuilder
294 | emptyIndex = empty
295 | {-# DEPRECATED emptyIndex "Use TarIndex.empty" #-}
296 | 
297 | -- | Add the next 'Entry' into the 'IndexBuilder'.
298 | --
299 | addNextEntry :: Entry -> IndexBuilder -> IndexBuilder
300 | addNextEntry entry (IndexBuilder stbl itrie nextOffset) =
301 |     IndexBuilder stbl' itrie'
302 |                  (nextEntryOffset entry nextOffset)
303 |   where
304 |     !entrypath    = splitTarPath (entryTarPath entry)
305 |     (stbl', cids) = StringTable.inserts entrypath stbl
306 |     itrie'        = IntTrie.insert cids nextOffset itrie
307 | 
308 | -- | Use this function if you want to skip some entries and not add them to the
309 | -- final 'TarIndex'.
310 | --
311 | skipNextEntry :: Entry -> IndexBuilder -> IndexBuilder
312 | skipNextEntry entry (IndexBuilder stbl itrie nextOffset) =
313 |     IndexBuilder stbl itrie (nextEntryOffset entry nextOffset)
314 | 
315 | -- | Finish accumulating 'Entry' information and build the compact 'TarIndex'
316 | -- lookup structure.
317 | --
318 | finalise :: IndexBuilder -> TarIndex
319 | finalise (IndexBuilder stbl itrie finalOffset) =
320 |     TarIndex pathTable pathTrie finalOffset
321 |   where
322 |     pathTable = StringTable.finalise stbl
323 |     pathTrie  = IntTrie.finalise itrie
324 | 
325 | finaliseIndex :: IndexBuilder -> TarIndex
326 | finaliseIndex = finalise
327 | {-# DEPRECATED finaliseIndex "Use TarIndex.finalise" #-}
328 | 
329 | -- | This is the offset immediately following the entry most recently added
330 | -- to the 'IndexBuilder'. You might use this if you need to know the offsets
331 | -- but don't want to use the 'TarIndex' lookup structure.
332 | -- Use with 'hSeekEntryOffset'. See also 'nextEntryOffset'.
333 | --
334 | indexNextEntryOffset :: IndexBuilder -> TarEntryOffset
335 | indexNextEntryOffset (IndexBuilder _ _ off) = off
336 | 
337 | -- | This is the offset immediately following the last entry in the tar file.
338 | -- This can be useful to append further entries into the tar file.
339 | -- Use with 'hSeekEntryOffset', or just use 'hSeekEndEntryOffset' directly.
340 | --
341 | indexEndEntryOffset :: TarIndex -> TarEntryOffset
342 | indexEndEntryOffset (TarIndex _ _ off) = off
343 | 
344 | -- | Calculate the 'TarEntryOffset' of the next entry, given the size and
345 | -- offset of the current entry.
346 | --
347 | -- This is much like using 'skipNextEntry' and 'indexNextEntryOffset', but without
348 | -- using an 'IndexBuilder'.
349 | --
350 | nextEntryOffset :: Entry -> TarEntryOffset -> TarEntryOffset
351 | nextEntryOffset entry offset =
352 |     offset
353 |   + 1
354 |   + case entryContent entry of
355 |       NormalFile     _   size -> blocks size
356 |       OtherEntryType _ _ size -> blocks size
357 |       _                       -> 0
358 |   where
359 |     -- NOTE: to avoid underflow, do the (fromIntegral :: Int64 -> Word32) last
360 |     blocks :: Int64 -> TarEntryOffset
361 |     blocks size = fromIntegral (1 + (size - 1) `div` 512)
362 | 
363 | type FilePathBS = BS.ByteString
364 | 
365 | splitTarPath :: TarPath -> [FilePathBS]
366 | splitTarPath (TarPath name prefix) =
367 |     splitDirectories prefix ++ splitDirectories name
368 | 
369 | splitDirectories :: FilePathBS -> [FilePathBS]
370 | splitDirectories bs =
371 |     case BS.Char8.split '/' bs of
372 |       c:cs | BS.null c -> BS.Char8.singleton '/' : filter (not . BS.null) cs
373 |       cs               ->                          filter (not . BS.null) cs
374 | 
375 | 
376 | -------------------------
377 | -- Resume building an existing index
378 | --
379 | 
380 | -- | Resume building an existing index
381 | --
382 | -- A 'TarIndex' is optimized for a highly compact and efficient in-memory
383 | -- representation. This, however, makes it read-only. If you have an existing
384 | -- 'TarIndex' for a large file, and want to add to it, you can translate the
385 | -- 'TarIndex' back to an 'IndexBuilder'. Be aware that this is a relatively
386 | -- costly operation (linear in the size of the 'TarIndex'), though still
387 | -- faster than starting again from scratch.
388 | --
389 | -- This is the left inverse to 'finalise' (modulo ordering).
390 | --
391 | unfinalise :: TarIndex -> IndexBuilder
392 | unfinalise (TarIndex pathTable pathTrie finalOffset) =
393 |     IndexBuilder (StringTable.unfinalise pathTable)
394 |                  (IntTrie.unfinalise pathTrie)
395 |                  finalOffset
396 | 
397 | 
398 | -------------------------
399 | -- I/O operations
400 | --
401 | 
402 | -- | Reads an entire 'Entry' at the given 'TarEntryOffset' in the tar file.
403 | -- The 'Handle' must be open for reading and be seekable.
404 | --
405 | -- This reads the whole entry into memory strictly, not incrementally. For more
406 | -- control, use 'hReadEntryHeader' and then read the entry content manually.
407 | --
408 | hReadEntry :: Handle -> TarEntryOffset -> IO Entry
409 | hReadEntry hnd off = do
410 |     entry <- hReadEntryHeader hnd off
411 |     case entryContent entry of
412 |       NormalFile       _ size -> do body <- LBS.hGet hnd (fromIntegral size)
413 |                                     return entry {
414 |                                       entryContent = NormalFile body size
415 |                                     }
416 |       OtherEntryType c _ size -> do body <- LBS.hGet hnd (fromIntegral size)
417 |                                     return entry {
418 |                                       entryContent = OtherEntryType c body size
419 |                                     }
420 |       _                       -> return entry
421 | 
422 | -- | Read the header for a 'Entry' at the given 'TarEntryOffset' in the tar
423 | -- file. The 'entryContent' will contain the correct metadata but an empty file
424 | -- content. The 'Handle' must be open for reading and be seekable.
425 | --
426 | -- The 'Handle' position is advanced to the beginning of the entry content (if
427 | -- any). You must check the 'entryContent' to see if the entry is of type
428 | -- 'NormalFile'. If it is, the 'NormalFile' gives the content length and you
429 | -- are free to read this much data from the 'Handle'.
430 | --
431 | -- > entry <- Tar.hReadEntryHeader hnd
432 | -- > case Tar.entryContent entry of
433 | -- >   Tar.NormalFile _ size -> do content <- BS.hGet hnd size
434 | -- >                               ...
435 | --
436 | -- Of course you don't have to read it all in one go (as 'hReadEntry' does),
437 | -- you can use any appropriate method to read it incrementally.
438 | --
439 | -- In addition to I\/O errors, this can throw a 'FormatError' if the offset is
440 | -- wrong, or if the file is not valid tar format.
441 | --
442 | -- There is also the lower level operation 'hSeekEntryOffset'.
443 | --
444 | hReadEntryHeader :: Handle -> TarEntryOffset -> IO Entry
445 | hReadEntryHeader hnd blockOff = do
446 |     hSeekEntryOffset hnd blockOff
447 |     header <- LBS.hGet hnd 512
448 |     case Tar.read header of
449 |       Tar.Next entry _ -> return entry
450 |       Tar.Fail e       -> throwIO e
451 |       Tar.Done         -> fail "hReadEntryHeader: impossible"
452 | 
453 | -- | Set the 'Handle' position to the position corresponding to the given
454 | -- 'TarEntryOffset'.
455 | --
456 | -- This position is where the entry metadata can be read. If you already know
457 | -- the entry has a body (and perhaps know it's length), you may wish to seek to
458 | -- the body content directly using 'hSeekEntryContentOffset'.
459 | --
460 | hSeekEntryOffset :: Handle -> TarEntryOffset -> IO ()
461 | hSeekEntryOffset hnd blockOff =
462 |     hSeek hnd AbsoluteSeek (fromIntegral blockOff * 512)
463 | 
464 | -- | Set the 'Handle' position to the entry content position corresponding to
465 | -- the given 'TarEntryOffset'.
466 | --
467 | -- This position is where the entry content can be read using ordinary I\/O
468 | -- operations (though you have to know in advance how big the entry content
469 | -- is). This is /only valid/ if you /already know/ the entry has a body (i.e.
470 | -- is a normal file).
471 | --
472 | hSeekEntryContentOffset :: Handle -> TarEntryOffset -> IO ()
473 | hSeekEntryContentOffset hnd blockOff =
474 |     hSeekEntryOffset hnd (blockOff + 1)
475 | 
476 | -- | This is a low level variant on 'hReadEntryHeader', that can be used to
477 | -- iterate through a tar file, entry by entry.
478 | --
479 | -- It has a few differences compared to 'hReadEntryHeader':
480 | --
481 | -- * It returns an indication when the end of the tar file is reached.
482 | --
483 | -- * It /does not/ move the 'Handle' position to the beginning of the entry
484 | --   content.
485 | --
486 | -- * It returns the 'TarEntryOffset' of the next entry.
487 | --
488 | -- After this action, the 'Handle' position is not in any useful place. If
489 | -- you want to skip to the next entry, take the 'TarEntryOffset' returned and
490 | -- use 'hReadEntryHeaderOrEof' again. Or if having inspected the 'Entry'
491 | -- header you want to read the entry content (if it has one) then use
492 | -- 'hSeekEntryContentOffset' on the original input 'TarEntryOffset'.
493 | --
494 | hReadEntryHeaderOrEof :: Handle -> TarEntryOffset
495 |                       -> IO (Maybe (Entry, TarEntryOffset))
496 | hReadEntryHeaderOrEof hnd blockOff = do
497 |     hSeekEntryOffset hnd blockOff
498 |     header <- LBS.hGet hnd 1024
499 |     case Tar.read header of
500 |       Tar.Next entry _ -> let !blockOff' = nextEntryOffset entry blockOff
501 |                            in return (Just (entry, blockOff'))
502 |       Tar.Done         -> return Nothing
503 |       Tar.Fail e       -> throwIO e
504 | 
505 | -- | Seek to the end of a tar file, to the position where new entries can
506 | -- be appended, and return that 'TarEntryOffset'.
507 | --
508 | -- If you have a valid 'TarIndex' for this tar file then you should supply it
509 | -- because it allows seeking directly to the correct location.
510 | --
511 | -- If you do not have an index, then this becomes an expensive linear
512 | -- operation because we have to read each tar entry header from the beginning
513 | -- to find the location immediately after the last entry (this is because tar
514 | -- files have a variable length trailer and we cannot reliably find that by
515 | -- starting at the end). In this mode, it will fail with an exception if the
516 | -- file is not in fact in the tar format.
517 | --
518 | hSeekEndEntryOffset :: Handle -> Maybe TarIndex -> IO TarEntryOffset
519 | hSeekEndEntryOffset hnd (Just index) = do
520 |     let offset = indexEndEntryOffset index
521 |     hSeekEntryOffset hnd offset
522 |     return offset
523 | 
524 | hSeekEndEntryOffset hnd Nothing = do
525 |     size <- hFileSize hnd
526 |     if size == 0
527 |       then return 0
528 |       else seekToEnd 0
529 |   where
530 |     seekToEnd offset = do
531 |       mbe <- hReadEntryHeaderOrEof hnd offset
532 |       case mbe of
533 |         Nothing -> do hSeekEntryOffset hnd offset
534 |                       return offset
535 |         Just (_, offset') -> seekToEnd offset'
536 | 
537 | -------------------------
538 | -- (de)serialisation
539 | --
540 | 
541 | -- | The 'TarIndex' is compact in memory, and it has a similarly compact
542 | -- external representation.
543 | --
544 | serialise :: TarIndex -> BS.ByteString
545 | serialise = toStrict . serialiseLBS
546 | 
547 | -- we keep this version around just so we can check we got the size right.
548 | serialiseLBS :: TarIndex -> LBS.ByteString
549 | serialiseLBS index =
550 |     BS.toLazyByteStringWith
551 |       (BS.untrimmedStrategy (serialiseSize index) 512) LBS.empty
552 |       (serialiseBuilder index)
553 | 
554 | serialiseSize :: TarIndex -> Int
555 | serialiseSize (TarIndex stringTable intTrie _) =
556 |     StringTable.serialiseSize stringTable
557 |   + IntTrie.serialiseSize intTrie
558 |   + 8
559 | 
560 | serialiseBuilder :: TarIndex -> BS.Builder
561 | serialiseBuilder (TarIndex stringTable intTrie finalOffset) =
562 |      BS.word32BE 2 -- format version
563 |   <> BS.word32BE finalOffset
564 |   <> StringTable.serialise stringTable
565 |   <> IntTrie.serialise intTrie
566 | 
567 | -- | Read the external representation back into a 'TarIndex'.
568 | --
569 | deserialise :: BS.ByteString -> Maybe (TarIndex, BS.ByteString)
570 | deserialise bs
571 |   | BS.length bs < 8
572 |   = Nothing
573 | 
574 |   | let ver = readWord32BE bs 0
575 |   , ver == 1
576 |   = do let !finalOffset = readWord32BE bs 4
577 |        (stringTable, bs')  <- StringTable.deserialiseV1 (BS.drop 8 bs)
578 |        (intTrie,     bs'') <- IntTrie.deserialise bs'
579 |        return (TarIndex stringTable intTrie finalOffset, bs'')
580 | 
581 |   | let ver = readWord32BE bs 0
582 |   , ver == 2
583 |   = do let !finalOffset = readWord32BE bs 4
584 |        (stringTable, bs')  <- StringTable.deserialiseV2 (BS.drop 8 bs)
585 |        (intTrie,     bs'') <- IntTrie.deserialise bs'
586 |        return (TarIndex stringTable intTrie finalOffset, bs'')
587 | 
588 |   | otherwise = Nothing
589 | 
590 | readWord32BE :: BS.ByteString -> Int -> Word32
591 | readWord32BE bs i =
592 |     assert (i >= 0 && i+3 <= BS.length bs - 1) $
593 |     fromIntegral (BS.unsafeIndex bs (i + 0)) `shiftL` 24
594 |   + fromIntegral (BS.unsafeIndex bs (i + 1)) `shiftL` 16
595 |   + fromIntegral (BS.unsafeIndex bs (i + 2)) `shiftL` 8
596 |   + fromIntegral (BS.unsafeIndex bs (i + 3))
597 | 
598 | 
599 | -------------------------
600 | -- Test properties
601 | --
602 | 
603 | #ifdef TESTS
604 | 
605 | -- Not quite the properties of a finite mapping because we also have lookups
606 | -- that result in completions.
607 | 
608 | prop_lookup :: ValidPaths -> NonEmptyFilePath -> Bool
609 | prop_lookup (ValidPaths paths) (NonEmptyFilePath p) =
610 |   case (lookup index p, Prelude.lookup p paths) of
611 |     (Nothing,                    Nothing)          -> True
612 |     (Just (TarFileEntry offset), Just (_,offset')) -> offset == offset'
613 |     (Just (TarDir entries),      Nothing)          -> sort (nub (map fst entries))
614 |                                                    == sort (nub completions)
615 |     _                                              -> False
616 |   where
617 |     index       = construct paths
618 |     completions = [ head (FilePath.splitDirectories completion)
619 |                   | (path,_) <- paths
620 |                   , completion <- maybeToList $ stripPrefix (p ++ "/") path ]
621 | 
622 | prop_toList :: ValidPaths -> Bool
623 | prop_toList (ValidPaths paths) =
624 |     sort (toList index)
625 |  == sort [ (path, off) | (path, (_sz, off)) <- paths ]
626 |   where
627 |     index = construct paths
628 | 
629 | prop_valid :: ValidPaths -> Bool
630 | prop_valid (ValidPaths paths)
631 |   | not $ StringTable.prop_valid   pathbits = error "TarIndex: bad string table"
632 |   | not $ IntTrie.prop_lookup      intpaths = error "TarIndex: bad int trie"
633 |   | not $ IntTrie.prop_completions intpaths = error "TarIndex: bad int trie"
634 |   | not $ prop'                             = error "TarIndex: bad prop"
635 |   | otherwise                               = True
636 | 
637 |   where
638 |     index@(TarIndex pathTable _ _) = construct paths
639 | 
640 |     pathbits = concatMap (map BS.Char8.pack . FilePath.splitDirectories . fst)
641 |                          paths
642 |     intpaths = [ (cids, offset)
643 |                | (path, (_size, offset)) <- paths
644 |                , let Just cids = toComponentIds pathTable path ]
645 |     prop' = flip all paths $ \(file, (_size, offset)) ->
646 |       case lookup index file of
647 |         Just (TarFileEntry offset') -> offset' == offset
648 |         _                           -> False
649 | 
650 | prop_serialise_deserialise :: ValidPaths -> Bool
651 | prop_serialise_deserialise (ValidPaths paths) =
652 |     Just (index, BS.empty) == (deserialise . serialise) index
653 |   where
654 |     index = construct paths
655 | 
656 | prop_serialiseSize :: ValidPaths -> Bool
657 | prop_serialiseSize (ValidPaths paths) =
658 |     case (LBS.toChunks . serialiseLBS) index of
659 |       [c1] -> BS.length c1 == serialiseSize index
660 |       _    -> False
661 |   where
662 |     index = construct paths
663 | 
664 | newtype NonEmptyFilePath = NonEmptyFilePath FilePath deriving Show
665 | 
666 | instance Arbitrary NonEmptyFilePath where
667 |   arbitrary = NonEmptyFilePath . FilePath.joinPath
668 |                 <$> listOf1 (elements ["a", "b", "c", "d"])
669 | 
670 | newtype ValidPaths = ValidPaths [(FilePath, (Int64, TarEntryOffset))] deriving Show
671 | 
672 | instance Arbitrary ValidPaths where
673 |   arbitrary = do
674 |       paths <- makeNoPrefix <$> listOf arbitraryPath
675 |       sizes <- vectorOf (length paths) (getNonNegative <$> arbitrary)
676 |       let offsets = scanl (\o sz -> o + 1 + blocks sz) 0 sizes
677 |       return (ValidPaths (zip paths (zip sizes offsets)))
678 |     where
679 |       arbitraryPath   = FilePath.joinPath
680 |                          <$> listOf1 (elements ["a", "b", "c", "d"])
681 |       makeNoPrefix [] = []
682 |       makeNoPrefix (k:ks)
683 |         | all (not . isPrefixOfOther k) ks
684 |                      = k : makeNoPrefix ks
685 |         | otherwise  =     makeNoPrefix ks
686 | 
687 |       isPrefixOfOther a b = a `isPrefixOf` b || b `isPrefixOf` a
688 | 
689 |       blocks :: Int64 -> TarEntryOffset
690 |       blocks size = fromIntegral (1 + ((size - 1) `div` 512))
691 | 
692 | -- Helper for bulk construction.
693 | construct :: [(FilePath, (Int64, TarEntryOffset))] -> TarIndex
694 | construct =
695 |     either (\_ -> undefined) id
696 |   . build
697 |   . foldr (\(path, (size, _off)) es -> Next (testEntry path size) es) Done
698 | 
699 | example0 :: Entries ()
700 | example0 =
701 |          testEntry "foo-1.0/foo-1.0.cabal" 1500 -- at block 0
702 |   `Next` testEntry "foo-1.0/LICENSE"       2000 -- at block 4
703 |   `Next` testEntry "foo-1.0/Data/Foo.hs"   1000 -- at block 9
704 |   `Next` Done
705 | 
706 | example1 :: Entries ()
707 | example1 =
708 |   Next (testEntry "./" 1500) Done <> example0
709 | 
710 | testEntry :: FilePath -> Int64 -> Entry
711 | testEntry name size = simpleEntry path (NormalFile mempty size)
712 |   where
713 |     Right path = toTarPath False name
714 | 
715 | -- | Simple tar archive containing regular files only
716 | data SimpleTarArchive = SimpleTarArchive {
717 |     simpleTarEntries :: Tar.Entries ()
718 |   , simpleTarRaw     :: [(FilePath, LBS.ByteString)]
719 |   , simpleTarBS      :: LBS.ByteString
720 |   }
721 | 
722 | instance Show SimpleTarArchive where
723 |   show = show . simpleTarRaw
724 | 
725 | prop_index_matches_tar :: SimpleTarArchive -> Property
726 | prop_index_matches_tar sta =
727 |     ioProperty (try go >>= either (\e -> throwIO (e :: SomeException))
728 |                                   (\_ -> return True))
729 |   where
730 |     go :: IO ()
731 |     go = do
732 |       h <- HBS.readHandle True (simpleTarBS sta)
733 |       goEntries h 0 (simpleTarEntries sta)
734 | 
735 |     goEntries :: Handle -> TarEntryOffset -> Tar.Entries () -> IO ()
736 |     goEntries _ _ Tar.Done =
737 |       return ()
738 |     goEntries _ _ (Tar.Fail _) =
739 |       throwIO (userError "Fail entry in SimpleTarArchive")
740 |     goEntries h offset (Tar.Next e es) = do
741 |       goEntry h offset e
742 |       goEntries h (nextEntryOffset e offset) es
743 | 
744 |     goEntry :: Handle -> TarEntryOffset -> Tar.Entry -> IO ()
745 |     goEntry h offset e = do
746 |       e' <- hReadEntry h offset
747 |       case (Tar.entryContent e, Tar.entryContent e') of
748 |         (Tar.NormalFile bs sz, Tar.NormalFile bs' sz') ->
749 |           unless (sz == sz' && bs == bs') $
750 |             throwIO $ userError "Entry mismatch"
751 |         _otherwise ->
752 |           throwIO $ userError "unexpected entry types"
753 | 
754 | instance Arbitrary SimpleTarArchive where
755 |   arbitrary = do
756 |       numEntries <- sized $ \n -> choose (0, n)
757 |       rawEntries <- mkRaw numEntries
758 |       let entries = mkList rawEntries
759 |       return SimpleTarArchive {
760 |           simpleTarEntries = mkEntries entries
761 |         , simpleTarRaw     = rawEntries
762 |         , simpleTarBS      = Tar.write entries
763 |         }
764 |     where
765 |       mkRaw :: Int -> Gen [(FilePath, LBS.ByteString)]
766 |       mkRaw 0 = return []
767 |       mkRaw n = do
768 |          -- Pick a size around 0, 1, or 2 block boundaries
769 |          sz <- sized $ \n -> elements (take n fileSizes)
770 |          bs <- LBS.pack `fmap` vectorOf sz arbitrary
771 |          es <- mkRaw (n - 1)
772 |          return $ ("file" ++ show n, bs) : es
773 | 
774 |       mkList :: [(FilePath, LBS.ByteString)] -> [Tar.Entry]
775 |       mkList []            = []
776 |       mkList ((fp, bs):es) = entry : mkList es
777 |         where
778 |           Right path = toTarPath False fp
779 |           entry   = simpleEntry path content
780 |           content = NormalFile bs (LBS.length bs)
781 | 
782 |       mkEntries :: [Tar.Entry] -> Tar.Entries ()
783 |       mkEntries []     = Tar.Done
784 |       mkEntries (e:es) = Tar.Next e (mkEntries es)
785 | 
786 |       -- Sizes around 0, 1, and 2 block boundaries
787 |       fileSizes :: [Int]
788 |       fileSizes = [
789 |                            0 ,    1 ,    2
790 |         ,  510 ,  511 ,  512 ,  513 ,  514
791 |         , 1022 , 1023 , 1024 , 1025 , 1026
792 |         ]
793 | 
794 | -- | 'IndexBuilder' constructed from a 'SimpleIndex'
795 | newtype SimpleIndexBuilder = SimpleIndexBuilder IndexBuilder
796 |   deriving Show
797 | 
798 | instance Arbitrary SimpleIndexBuilder where
799 |   arbitrary = SimpleIndexBuilder . build' . simpleTarEntries <$> arbitrary
800 |     where
801 |       -- like 'build', but don't finalize
802 |       build' :: Show e => Entries e -> IndexBuilder
803 |       build' = go empty
804 |         where
805 |           go !builder (Next e es) = go (addNextEntry e builder) es
806 |           go !builder  Done       = builder
807 |           go !_       (Fail err)  = error (show err)
808 | 
809 | prop_finalise_unfinalise :: SimpleIndexBuilder -> Bool
810 | prop_finalise_unfinalise (SimpleIndexBuilder index) =
811 |     unfinalise (finalise index) == index
812 | 
813 | #endif
814 | 
815 | toStrict :: LBS.ByteString -> BS.ByteString
816 | #if MIN_VERSION_bytestring(0,10,0)
817 | toStrict = LBS.toStrict
818 | #else
819 | toStrict = BS.concat . LBS.toChunks
820 | #endif
821 | 
822 | #if !(MIN_VERSION_base(4,5,0))
823 | (<>) :: Monoid m => m -> m -> m
824 | (<>) = mappend
825 | #endif
826 | 


--------------------------------------------------------------------------------