├── bytestring-0.9.cabal.config ├── htar ├── Setup.hs ├── LICENSE ├── htar.cabal └── htar.hs ├── Setup.lhs ├── README.md ├── bench └── Main.hs ├── LICENSE ├── test └── Properties.hs ├── changelog.md ├── Codec └── Archive │ ├── Tar │ ├── Entry.hs │ ├── Unpack.hs │ ├── Write.hs │ ├── Pack.hs │ ├── Check.hs │ ├── Read.hs │ ├── Index │ │ ├── StringTable.hs │ │ └── IntTrie.hs │ ├── Types.hs │ └── Index.hs │ └── Tar.hs ├── tar.cabal └── .travis.yml /bytestring-0.9.cabal.config: -------------------------------------------------------------------------------- 1 | constraints: bytestring==0.9.* 2 | -------------------------------------------------------------------------------- /htar/Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /htar/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CIFASIS/tar/master/htar/LICENSE -------------------------------------------------------------------------------- /Setup.lhs: -------------------------------------------------------------------------------- 1 | > import Distribution.Simple 2 | 3 | > main :: IO () 4 | > main = defaultMain 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | The `tar` Package [![Hackage](https://img.shields.io/hackage/v/tar.svg)](https://hackage.haskell.org/package/tar) [![Build Status](https://travis-ci.org/haskell/tar.svg)](https://travis-ci.org/haskell/tar) 2 | ================= 3 | 4 | See [`tar` on Hackage](https://hackage.haskell.org/package/tar) for more information. 5 | -------------------------------------------------------------------------------- /bench/Main.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import qualified Codec.Archive.Tar as Tar 4 | import qualified Codec.Archive.Tar.Index as TarIndex 5 | 6 | import qualified Data.ByteString.Lazy as BS 7 | import Control.Exception 8 | 9 | import Criterion 10 | import Criterion.Main 11 | 12 | main = defaultMain benchmarks 13 | 14 | benchmarks :: [Benchmark] 15 | benchmarks = 16 | [ env loadTarFile $ \tarfile -> 17 | bench "read" (nf Tar.read tarfile) 18 | 19 | , env loadTarEntriesList $ \entries -> 20 | bench "write" (nf Tar.write entries) 21 | 22 | , env loadTarEntries $ \entries -> 23 | bench "index build" (nf TarIndex.build entries) 24 | 25 | , env loadTarIndex $ \entries -> 26 | bench "index rebuild" (nf (TarIndex.finalise . TarIndex.unfinalise) entries) 27 | ] 28 | 29 | loadTarFile :: IO BS.ByteString 30 | loadTarFile = 31 | BS.readFile "01-index.tar" 32 | 33 | loadTarEntries :: IO (Tar.Entries Tar.FormatError) 34 | loadTarEntries = 35 | fmap Tar.read loadTarFile 36 | 37 | loadTarEntriesList :: IO [Tar.Entry] 38 | loadTarEntriesList = 39 | fmap (Tar.foldEntries (:) [] throw) loadTarEntries 40 | 41 | loadTarIndex :: IO TarIndex.TarIndex 42 | loadTarIndex = 43 | fmap (either throw id . TarIndex.build) 44 | loadTarEntries 45 | 46 | -------------------------------------------------------------------------------- /htar/htar.cabal: -------------------------------------------------------------------------------- 1 | name: htar 2 | version: 0.4.0.2 3 | license: BSD3 4 | license-File: LICENSE 5 | author: Bjorn Bringert 6 | Duncan Coutts 7 | maintainer: Duncan Coutts 8 | copyright: 2007 Bjorn Bringert 9 | 2008-2015 Duncan Coutts 10 | category: Codec 11 | synopsis: Command-line tar archive utility. 12 | description: A Command-line utility to create, extract and list the 13 | contents of tar archives. It can work with compressed 14 | archives using gzip or bzip2 compression. 15 | . 16 | This is in part a demo of the @tar@ library but it is also 17 | usable in place of the ordinary @tar@ program for systems 18 | like Windows that do not come with it as standard. 19 | build-type: Simple 20 | cabal-version: >= 1.6 21 | 22 | flag old-locale 23 | 24 | executable htar 25 | main-is: htar.hs 26 | ghc-options: -Wall 27 | build-depends: 28 | base == 4.*, 29 | time >= 1.1, 30 | directory >= 1.0, 31 | filepath >= 1.0, 32 | bytestring >= 0.9, 33 | tar == 0.4.* && >= 0.4.2, 34 | zlib >= 0.4 && < 0.7, 35 | bzlib >= 0.4 && < 0.7 36 | 37 | if flag(old-locale) 38 | build-depends: time < 1.5, old-locale >= 1.0 39 | else 40 | build-depends: time >= 1.5 41 | 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2007 Björn Bringert, 2 | 2008-2015 Duncan Coutts, 3 | 2011 Max Bolingbroke 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | - Redistributions of source code must retain the above copyright notice, 10 | this list of conditions and the following disclaimer. 11 | - Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in the 13 | documentation and/or other materials provided with the distribution. 14 | - Neither the names of the copyright owners nor the names of the 15 | contributors may be used to endorse or promote products derived 16 | from this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /test/Properties.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import qualified Codec.Archive.Tar.Index as Index 4 | import qualified Codec.Archive.Tar.Index.IntTrie as IntTrie 5 | import qualified Codec.Archive.Tar.Index.StringTable as StringTable 6 | import qualified Codec.Archive.Tar as Tar 7 | 8 | import qualified Data.ByteString as BS 9 | 10 | import Test.Tasty 11 | import Test.Tasty.QuickCheck 12 | 13 | main :: IO () 14 | main = 15 | defaultMain $ 16 | testGroup "tar tests" [ 17 | 18 | testGroup "write/read" [ 19 | testProperty "ustar format" Tar.prop_write_read_ustar, 20 | testProperty "gnu format" Tar.prop_write_read_gnu, 21 | testProperty "v7 format" Tar.prop_write_read_v7 22 | ] 23 | 24 | , testGroup "string table" [ 25 | testProperty "construction" StringTable.prop_valid, 26 | testProperty "sorted" StringTable.prop_sorted, 27 | testProperty "serialise" StringTable.prop_serialise_deserialise, 28 | testProperty "size" StringTable.prop_serialiseSize, 29 | testProperty "unfinalise" StringTable.prop_finalise_unfinalise 30 | ] 31 | 32 | , testGroup "int trie" [ 33 | testProperty "unit 1" IntTrie.test1, 34 | testProperty "unit 2" IntTrie.test2, 35 | testProperty "unit 3" IntTrie.test3, 36 | testProperty "lookups" IntTrie.prop_lookup_mono, 37 | testProperty "completions" IntTrie.prop_completions_mono, 38 | testProperty "toList" IntTrie.prop_construct_toList, 39 | testProperty "serialise" IntTrie.prop_serialise_deserialise, 40 | testProperty "size" IntTrie.prop_serialiseSize, 41 | testProperty "unfinalise" IntTrie.prop_finalise_unfinalise 42 | ] 43 | 44 | , testGroup "index" [ 45 | testProperty "lookup" Index.prop_lookup, 46 | testProperty "valid" Index.prop_valid, 47 | testProperty "toList" Index.prop_toList, 48 | testProperty "serialise" Index.prop_serialise_deserialise, 49 | testProperty "size" Index.prop_serialiseSize, 50 | testProperty "matches tar" Index.prop_index_matches_tar, 51 | testProperty "unfinalise" Index.prop_finalise_unfinalise 52 | ] 53 | ] 54 | 55 | -------------------------------------------------------------------------------- /changelog.md: -------------------------------------------------------------------------------- 1 | 0.5.0.1 Duncan Coutts January 2016 2 | 3 | * Fix compatability with directory-1.2.3+ 4 | 5 | 0.5.0.0 Duncan Coutts January 2016 6 | 7 | * Work with old version of bytestring (using bytestring-builder package). 8 | * Builds with GHC 6.10 -- 8.0. 9 | * Change type of Index.serialise to be simply strict bytestring. 10 | * Preserve file timestamps on unpack (with directory-1.2.3+) 11 | 12 | 0.4.5.0 Duncan Coutts January 2016 13 | 14 | * Revert accidental minor API change in 0.4.x series (the type of the 15 | owner and group name strings). The 0.4.3.0 and 0.4.4.0 releases 16 | contained the accidental API change. 17 | * Add a handy foldlEntries function 18 | 19 | 0.4.4.0 Duncan Coutts January 2016 20 | 21 | * Build and warning fixes for GHC 7.10 and 8.0 22 | * New Index module function `toList` to get all index entries 23 | 24 | 0.4.3.0 Duncan Coutts January 2016 25 | 26 | * New Index function `unfinalise` to extend existing index 27 | * 9x faster reading 28 | * 9x faster index construction 29 | * 24x faster index extension 30 | * More compact entry types, using ByteStrings 31 | * More Eq and Show instances 32 | * Greater QC test coverage 33 | * Fix minor bug in reading non-standard v7 format entries 34 | 35 | 0.4.2.2 Edsko de Vries October 2015 36 | 37 | * Fix bug in Index 38 | 39 | 0.4.2.1 Duncan Coutts July 2015 40 | 41 | * Fix tests for the Index modules (the code was right) 42 | 43 | 0.4.2.0 Duncan Coutts July 2015 44 | 45 | * New Index module for random access to tar file contents 46 | * New lower level tar file I/O actions 47 | * New tarball file 'append' action 48 | 49 | 0.4.1.0 Duncan Coutts January 2015 50 | 51 | * Build with GHC 7.10 52 | * Switch from old-time to time package 53 | * Added more instance for Entries type 54 | 55 | 0.4.0.1 Duncan Coutts October 2012 56 | 57 | * fixes to work with directory 1.2 58 | * More Eq/Ord instances 59 | 60 | 0.4.0.0 Duncan Coutts February 2012 61 | 62 | * More explicit error types and error handling 63 | * Support star base-256 number format 64 | * Improved API documentation 65 | -------------------------------------------------------------------------------- /Codec/Archive/Tar/Entry.hs: -------------------------------------------------------------------------------- 1 | ----------------------------------------------------------------------------- 2 | -- | 3 | -- Module : Codec.Archive.Tar.Entry 4 | -- Copyright : (c) 2007 Bjorn Bringert, 5 | -- 2008 Andrea Vezzosi, 6 | -- 2008-2009 Duncan Coutts 7 | -- License : BSD3 8 | -- 9 | -- Maintainer : duncan@community.haskell.org 10 | -- Portability : portable 11 | -- 12 | -- Types and functions to manipulate tar entries. 13 | -- 14 | -- While the "Codec.Archive.Tar" module provides only the simple high level 15 | -- API, this module provides full access to the details of tar entries. This 16 | -- lets you inspect all the meta-data, construct entries and handle error cases 17 | -- more precisely. 18 | -- 19 | -- This module uses common names and so is designed to be imported qualified: 20 | -- 21 | -- > import qualified Codec.Archive.Tar as Tar 22 | -- > import qualified Codec.Archive.Tar.Entry as Tar 23 | -- 24 | ----------------------------------------------------------------------------- 25 | module Codec.Archive.Tar.Entry ( 26 | 27 | -- * Tar entry and associated types 28 | Entry(..), 29 | --TODO: should be the following with the Entry constructor not exported, 30 | -- but haddock cannot document that properly 31 | -- see http://trac.haskell.org/haddock/ticket/3 32 | --Entry(filePath, fileMode, ownerId, groupId, fileSize, modTime, 33 | -- fileType, linkTarget, headerExt, fileContent), 34 | entryPath, 35 | EntryContent(..), 36 | Ownership(..), 37 | 38 | FileSize, 39 | Permissions, 40 | EpochTime, 41 | DevMajor, 42 | DevMinor, 43 | TypeCode, 44 | Format(..), 45 | 46 | -- * Constructing simple entry values 47 | simpleEntry, 48 | fileEntry, 49 | directoryEntry, 50 | 51 | -- * Standard file permissions 52 | -- | For maximum portability when constructing archives use only these file 53 | -- permissions. 54 | ordinaryFilePermissions, 55 | executableFilePermissions, 56 | directoryPermissions, 57 | 58 | -- * Constructing entries from disk files 59 | packFileEntry, 60 | packDirectoryEntry, 61 | getDirectoryContentsRecursive, 62 | 63 | -- * TarPath type 64 | TarPath, 65 | toTarPath, 66 | fromTarPath, 67 | fromTarPathToPosixPath, 68 | fromTarPathToWindowsPath, 69 | 70 | -- * LinkTarget type 71 | LinkTarget, 72 | toLinkTarget, 73 | fromLinkTarget, 74 | fromLinkTargetToPosixPath, 75 | fromLinkTargetToWindowsPath, 76 | 77 | ) where 78 | 79 | import Codec.Archive.Tar.Types 80 | import Codec.Archive.Tar.Pack 81 | -------------------------------------------------------------------------------- /tar.cabal: -------------------------------------------------------------------------------- 1 | name: tar 2 | version: 0.5.0.1 3 | license: BSD3 4 | license-file: LICENSE 5 | author: Duncan Coutts 6 | Bjorn Bringert 7 | maintainer: Duncan Coutts 8 | bug-reports: https://github.com/haskell/tar/issues 9 | copyright: 2007 Bjorn Bringert 10 | 2008-2016 Duncan Coutts 11 | category: Codec 12 | synopsis: Reading, writing and manipulating ".tar" archive files. 13 | description: This library is for working with \"@.tar@\" archive files. It 14 | can read and write a range of common variations of archive 15 | format including V7, POSIX USTAR and GNU formats. 16 | . 17 | It provides support for packing and unpacking portable 18 | archives. This makes it suitable for distribution but not 19 | backup because details like file ownership and exact 20 | permissions are not preserved. 21 | . 22 | It also provides features for random access to archive 23 | content using an index. 24 | build-type: Simple 25 | cabal-version: >=1.8 26 | extra-source-files: changelog.md 27 | tested-with: GHC==6.10.4, GHC==6.12.3, GHC==7.0.4, GHC==7.2.2, GHC==7.4.2, 28 | GHC==7.6.3, GHC==7.8.4, GHC==7.10.2, GHC==8.1 29 | 30 | source-repository head 31 | type: git 32 | location: https://github.com/haskell/tar.git 33 | 34 | flag old-time 35 | default: False 36 | 37 | flag old-bytestring 38 | default: False 39 | 40 | library 41 | build-depends: base == 4.*, 42 | filepath, 43 | directory, 44 | array, 45 | containers >= 0.2, 46 | deepseq >= 1.1 && < 1.5 47 | if flag(old-time) 48 | build-depends: directory < 1.2, old-time 49 | else 50 | build-depends: directory >= 1.2, time 51 | 52 | if flag(old-bytestring) 53 | build-depends: bytestring-builder, bytestring >= 0.9 && <0.10 54 | else 55 | build-depends: bytestring >= 0.10 56 | 57 | exposed-modules: 58 | Codec.Archive.Tar 59 | Codec.Archive.Tar.Entry 60 | Codec.Archive.Tar.Check 61 | Codec.Archive.Tar.Index 62 | Codec.Archive.Tar.Types 63 | Codec.Archive.Tar.Read 64 | Codec.Archive.Tar.Write 65 | Codec.Archive.Tar.Pack 66 | Codec.Archive.Tar.Unpack 67 | Codec.Archive.Tar.Index.StringTable 68 | Codec.Archive.Tar.Index.IntTrie 69 | 70 | other-extensions: 71 | CPP, BangPatterns, 72 | DeriveDataTypeable, ScopedTypeVariables 73 | 74 | ghc-options: -Wall -fno-warn-unused-imports 75 | 76 | test-suite properties 77 | type: exitcode-stdio-1.0 78 | build-depends: base, 79 | filepath, 80 | array, 81 | containers, 82 | deepseq, 83 | bytestring-handle, 84 | QuickCheck == 2.*, 85 | tasty >= 0.10 && <0.12, 86 | tasty-quickcheck == 0.8.* 87 | 88 | if flag(old-time) 89 | build-depends: directory < 1.2, old-time 90 | else 91 | build-depends: directory >= 1.2, time 92 | 93 | if flag(old-bytestring) 94 | build-depends: bytestring-builder, bytestring >= 0.9 && <0.10 95 | else 96 | build-depends: bytestring >= 0.10 97 | 98 | hs-source-dirs: . test 99 | 100 | main-is: test/Properties.hs 101 | cpp-options: -DTESTS 102 | 103 | other-modules: 104 | Codec.Archive.Tar.Index 105 | Codec.Archive.Tar.Index.StringTable 106 | Codec.Archive.Tar.Index.IntTrie 107 | 108 | other-extensions: 109 | CPP, BangPatterns, 110 | DeriveDataTypeable, ScopedTypeVariables 111 | 112 | ghc-options: -fno-ignore-asserts 113 | 114 | benchmark bench 115 | type: exitcode-stdio-1.0 116 | hs-source-dirs: . bench 117 | main-is: bench/Main.hs 118 | build-depends: base, 119 | bytestring, 120 | filepath, directory, 121 | array, 122 | containers, 123 | deepseq, 124 | old-time, time, 125 | criterion >= 1.0 126 | 127 | -------------------------------------------------------------------------------- /Codec/Archive/Tar/Unpack.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | ----------------------------------------------------------------------------- 3 | -- | 4 | -- Module : Codec.Archive.Tar 5 | -- Copyright : (c) 2007 Bjorn Bringert, 6 | -- 2008 Andrea Vezzosi, 7 | -- 2008-2009, 2012, 2016 Duncan Coutts 8 | -- License : BSD3 9 | -- 10 | -- Maintainer : duncan@community.haskell.org 11 | -- Portability : portable 12 | -- 13 | ----------------------------------------------------------------------------- 14 | module Codec.Archive.Tar.Unpack ( 15 | unpack, 16 | ) where 17 | 18 | import Codec.Archive.Tar.Types 19 | import Codec.Archive.Tar.Check 20 | 21 | import qualified Data.ByteString.Lazy as BS 22 | import System.FilePath 23 | ( () ) 24 | import qualified System.FilePath as FilePath.Native 25 | ( takeDirectory ) 26 | import System.Directory 27 | ( createDirectoryIfMissing, copyFile ) 28 | import Control.Exception 29 | ( Exception, throwIO ) 30 | #if MIN_VERSION_directory(1,2,3) 31 | import System.Directory 32 | ( setModificationTime ) 33 | import Data.Time.Clock.POSIX 34 | ( posixSecondsToUTCTime ) 35 | import Control.Exception as Exception 36 | ( catch ) 37 | import System.IO.Error 38 | ( isPermissionError ) 39 | #endif 40 | 41 | 42 | -- | Create local files and directories based on the entries of a tar archive. 43 | -- 44 | -- This is a portable implementation of unpacking suitable for portable 45 | -- archives. It handles 'NormalFile' and 'Directory' entries and has simulated 46 | -- support for 'SymbolicLink' and 'HardLink' entries. Links are implemented by 47 | -- copying the target file. This therefore works on Windows as well as Unix. 48 | -- All other entry types are ignored, that is they are not unpacked and no 49 | -- exception is raised. 50 | -- 51 | -- If the 'Entries' ends in an error then it is raised an an exception. Any 52 | -- files or directories that have been unpacked before the error was 53 | -- encountered will not be deleted. For this reason you may want to unpack 54 | -- into an empty directory so that you can easily clean up if unpacking fails 55 | -- part-way. 56 | -- 57 | -- On its own, this function only checks for security (using 'checkSecurity'). 58 | -- You can do other checks by applying checking functions to the 'Entries' that 59 | -- you pass to this function. For example: 60 | -- 61 | -- > unpack dir (checkTarbomb expectedDir entries) 62 | -- 63 | -- If you care about the priority of the reported errors then you may want to 64 | -- use 'checkSecurity' before 'checkTarbomb' or other checks. 65 | -- 66 | unpack :: Exception e => FilePath -> Entries e -> IO () 67 | unpack baseDir entries = unpackEntries [] (checkSecurity entries) 68 | >>= emulateLinks 69 | 70 | where 71 | -- We're relying here on 'checkSecurity' to make sure we're not scribbling 72 | -- files all over the place. 73 | 74 | unpackEntries _ (Fail err) = either throwIO throwIO err 75 | unpackEntries links Done = return links 76 | unpackEntries links (Next entry es) = case entryContent entry of 77 | NormalFile file _ -> extractFile path file mtime 78 | >> unpackEntries links es 79 | Directory -> extractDir path mtime 80 | >> unpackEntries links es 81 | HardLink link -> (unpackEntries $! saveLink path link links) es 82 | SymbolicLink link -> (unpackEntries $! saveLink path link links) es 83 | _ -> unpackEntries links es --ignore other file types 84 | where 85 | path = entryPath entry 86 | mtime = entryTime entry 87 | 88 | extractFile path content mtime = do 89 | -- Note that tar archives do not make sure each directory is created 90 | -- before files they contain, indeed we may have to create several 91 | -- levels of directory. 92 | createDirectoryIfMissing True absDir 93 | BS.writeFile absPath content 94 | setModTime absPath mtime 95 | where 96 | absDir = baseDir FilePath.Native.takeDirectory path 97 | absPath = baseDir path 98 | 99 | extractDir path mtime = do 100 | createDirectoryIfMissing True absPath 101 | setModTime absPath mtime 102 | where 103 | absPath = baseDir path 104 | 105 | saveLink path link links = seq (length path) 106 | $ seq (length link') 107 | $ (path, link'):links 108 | where link' = fromLinkTarget link 109 | 110 | emulateLinks = mapM_ $ \(relPath, relLinkTarget) -> 111 | let absPath = baseDir relPath 112 | absTarget = FilePath.Native.takeDirectory absPath relLinkTarget 113 | in copyFile absTarget absPath 114 | 115 | setModTime :: FilePath -> EpochTime -> IO () 116 | #if MIN_VERSION_directory(1,2,3) 117 | -- functionality only supported as of directory-1.2.3.x 118 | setModTime path t = 119 | setModificationTime path (posixSecondsToUTCTime (fromIntegral t)) 120 | `Exception.catch` \e -> 121 | if isPermissionError e then return () else throwIO e 122 | #else 123 | setModTime _path _t = return () 124 | #endif 125 | -------------------------------------------------------------------------------- /Codec/Archive/Tar/Write.hs: -------------------------------------------------------------------------------- 1 | ----------------------------------------------------------------------------- 2 | -- | 3 | -- Module : Codec.Archive.Tar.Write 4 | -- Copyright : (c) 2007 Bjorn Bringert, 5 | -- 2008 Andrea Vezzosi, 6 | -- 2008-2009 Duncan Coutts 7 | -- License : BSD3 8 | -- 9 | -- Maintainer : duncan@community.haskell.org 10 | -- Portability : portable 11 | -- 12 | ----------------------------------------------------------------------------- 13 | module Codec.Archive.Tar.Write (write) where 14 | 15 | import Codec.Archive.Tar.Types 16 | 17 | import Data.Char (ord) 18 | import Data.List (foldl') 19 | import Data.Monoid (mempty) 20 | import Numeric (showOct) 21 | 22 | import qualified Data.ByteString as BS 23 | import qualified Data.ByteString.Char8 as BS.Char8 24 | import qualified Data.ByteString.Lazy as LBS 25 | import qualified Data.ByteString.Lazy.Char8 as LBS.Char8 26 | 27 | 28 | -- | Create the external representation of a tar archive by serialising a list 29 | -- of tar entries. 30 | -- 31 | -- * The conversion is done lazily. 32 | -- 33 | write :: [Entry] -> LBS.ByteString 34 | write es = LBS.concat $ map putEntry es ++ [LBS.replicate (512*2) 0] 35 | 36 | putEntry :: Entry -> LBS.ByteString 37 | putEntry entry = case entryContent entry of 38 | NormalFile content size -> LBS.concat [ header, content, padding size ] 39 | OtherEntryType _ content size -> LBS.concat [ header, content, padding size ] 40 | _ -> header 41 | where 42 | header = putHeader entry 43 | padding size = LBS.replicate paddingSize 0 44 | where paddingSize = fromIntegral (negate size `mod` 512) 45 | 46 | putHeader :: Entry -> LBS.ByteString 47 | putHeader entry = 48 | LBS.Char8.pack 49 | $ take 148 block 50 | ++ putOct 7 checksum 51 | ++ ' ' : drop 156 block 52 | -- ++ putOct 8 checksum 53 | -- ++ drop 156 block 54 | where 55 | block = putHeaderNoChkSum entry 56 | checksum = foldl' (\x y -> x + ord y) 0 block 57 | 58 | putHeaderNoChkSum :: Entry -> String 59 | putHeaderNoChkSum Entry { 60 | entryTarPath = TarPath name prefix, 61 | entryContent = content, 62 | entryPermissions = permissions, 63 | entryOwnership = ownership, 64 | entryTime = modTime, 65 | entryFormat = format 66 | } = 67 | 68 | concat 69 | [ putBString 100 $ name 70 | , putOct 8 $ permissions 71 | , putOct 8 $ ownerId ownership 72 | , putOct 8 $ groupId ownership 73 | , putOct 12 $ contentSize 74 | , putOct 12 $ modTime 75 | , fill 8 $ ' ' -- dummy checksum 76 | , putChar8 $ typeCode 77 | , putBString 100 $ linkTarget 78 | ] ++ 79 | case format of 80 | V7Format -> 81 | fill 255 '\NUL' 82 | UstarFormat -> concat 83 | [ putBString 8 $ ustarMagic 84 | , putString 32 $ ownerName ownership 85 | , putString 32 $ groupName ownership 86 | , putOct 8 $ deviceMajor 87 | , putOct 8 $ deviceMinor 88 | , putBString 155 $ prefix 89 | , fill 12 $ '\NUL' 90 | ] 91 | GnuFormat -> concat 92 | [ putBString 8 $ gnuMagic 93 | , putString 32 $ ownerName ownership 94 | , putString 32 $ groupName ownership 95 | , putGnuDev 8 $ deviceMajor 96 | , putGnuDev 8 $ deviceMinor 97 | , putBString 155 $ prefix 98 | , fill 12 $ '\NUL' 99 | ] 100 | where 101 | (typeCode, contentSize, linkTarget, 102 | deviceMajor, deviceMinor) = case content of 103 | NormalFile _ size -> ('0' , size, mempty, 0, 0) 104 | Directory -> ('5' , 0, mempty, 0, 0) 105 | SymbolicLink (LinkTarget link) -> ('2' , 0, link, 0, 0) 106 | HardLink (LinkTarget link) -> ('1' , 0, link, 0, 0) 107 | CharacterDevice major minor -> ('3' , 0, mempty, major, minor) 108 | BlockDevice major minor -> ('4' , 0, mempty, major, minor) 109 | NamedPipe -> ('6' , 0, mempty, 0, 0) 110 | OtherEntryType code _ size -> (code, size, mempty, 0, 0) 111 | 112 | putGnuDev w n = case content of 113 | CharacterDevice _ _ -> putOct w n 114 | BlockDevice _ _ -> putOct w n 115 | _ -> replicate w '\NUL' 116 | 117 | ustarMagic, gnuMagic :: BS.ByteString 118 | ustarMagic = BS.Char8.pack "ustar\NUL00" 119 | gnuMagic = BS.Char8.pack "ustar \NUL" 120 | 121 | -- * TAR format primitive output 122 | 123 | type FieldWidth = Int 124 | 125 | putBString :: FieldWidth -> BS.ByteString -> String 126 | putBString n s = BS.Char8.unpack (BS.take n s) ++ fill (n - BS.length s) '\NUL' 127 | 128 | putString :: FieldWidth -> String -> String 129 | putString n s = take n s ++ fill (n - length s) '\NUL' 130 | 131 | --TODO: check integer widths, eg for large file sizes 132 | putOct :: (Integral a, Show a) => FieldWidth -> a -> String 133 | putOct n x = 134 | let octStr = take (n-1) $ showOct x "" 135 | in fill (n - length octStr - 1) '0' 136 | ++ octStr 137 | ++ putChar8 '\NUL' 138 | 139 | putChar8 :: Char -> String 140 | putChar8 c = [c] 141 | 142 | fill :: FieldWidth -> Char -> String 143 | fill n c = replicate n c 144 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # This file has been generated -- see https://github.com/hvr/multi-ghc-travis 2 | language: c 3 | sudo: false 4 | 5 | cache: 6 | directories: 7 | - $HOME/.cabsnap 8 | - $HOME/.cabal/packages 9 | 10 | before_cache: 11 | - rm -fv $HOME/.cabal/packages/hackage.haskell.org/build-reports.log 12 | - rm -fv $HOME/.cabal/packages/hackage.haskell.org/00-index.tar 13 | 14 | matrix: 15 | include: 16 | - env: CABALVER=1.16 GHCVER=7.0.4 NOTEST=1 17 | compiler: ": #GHC 7.0.4" 18 | addons: {apt: {packages: [cabal-install-1.16,ghc-7.0.4,zlib1g-dev], sources: [hvr-ghc]}} 19 | - env: CABALVER=1.16 GHCVER=7.0.4 NOTEST=1 CABALCONFIG=bytestring-0.9.cabal.config 20 | compiler: ": #GHC 7.0.4 bytestring-0.9" 21 | addons: {apt: {packages: [cabal-install-1.16,ghc-7.0.4,zlib1g-dev], sources: [hvr-ghc]}} 22 | - env: CABALVER=1.16 GHCVER=7.2.2 NOTEST=1 23 | compiler: ": #GHC 7.2.2" 24 | addons: {apt: {packages: [cabal-install-1.16,ghc-7.2.2,zlib1g-dev], sources: [hvr-ghc]}} 25 | - env: CABALVER=1.16 GHCVER=7.2.2 NOTEST=1 CABALCONFIG=bytestring-0.9.cabal.config 26 | compiler: ": #GHC 7.2.2 bytestring-0.9" 27 | addons: {apt: {packages: [cabal-install-1.16,ghc-7.2.2,zlib1g-dev], sources: [hvr-ghc]}} 28 | - env: CABALVER=1.22 GHCVER=7.4.2 29 | compiler: ": #GHC 7.4.2" 30 | addons: {apt: {packages: [cabal-install-1.22,ghc-7.4.2,zlib1g-dev], sources: [hvr-ghc]}} 31 | - env: CABALVER=1.22 GHCVER=7.4.2 CABALCONFIG=bytestring-0.9.cabal.config 32 | compiler: ": #GHC 7.4.2 bytestring-0.9" 33 | addons: {apt: {packages: [cabal-install-1.22,ghc-7.4.2,zlib1g-dev], sources: [hvr-ghc]}} 34 | - env: CABALVER=1.16 GHCVER=7.6.3 35 | compiler: ": #GHC 7.6.3" 36 | addons: {apt: {packages: [cabal-install-1.16,ghc-7.6.3,zlib1g-dev], sources: [hvr-ghc]}} 37 | - env: CABALVER=1.18 GHCVER=7.8.4 38 | compiler: ": #GHC 7.8.4" 39 | addons: {apt: {packages: [cabal-install-1.18,ghc-7.8.4,zlib1g-dev], sources: [hvr-ghc]}} 40 | - env: CABALVER=1.22 GHCVER=7.10.2 41 | compiler: ": #GHC 7.10.2" 42 | addons: {apt: {packages: [cabal-install-1.22,ghc-7.10.2,zlib1g-dev], sources: [hvr-ghc]}} 43 | - env: CABALVER=head GHCVER=head NOTEST=1 44 | compiler: ": #GHC head" 45 | addons: {apt: {packages: [cabal-install-head,ghc-head,zlib1g-dev], sources: [hvr-ghc]}} 46 | - env: CABALVER=head GHCVER=head 47 | compiler: ": #GHC head" 48 | addons: {apt: {packages: [cabal-install-head,ghc-head,zlib1g-dev], sources: [hvr-ghc]}} 49 | 50 | allow_failures: 51 | - env: CABALVER=head GHCVER=head 52 | 53 | before_install: 54 | - unset CC 55 | - export PATH=/opt/ghc/$GHCVER/bin:/opt/cabal/$CABALVER/bin:$PATH 56 | - if [ "x$CABALCONFIG" = "x" ]; then true; else cp $CABALCONFIG cabal.config; fi 57 | - if [ "x$NOTEST" = "x" ]; then export CABFLAGS="--enable-tests"; else export CABFLAGS=""; fi 58 | 59 | install: 60 | - cabal --version 61 | - echo "$(ghc --version) [$(ghc --print-project-git-commit-id 2> /dev/null || echo '?')]" 62 | - if [ -f $HOME/.cabal/packages/hackage.haskell.org/00-index.tar.gz ]; 63 | then 64 | zcat $HOME/.cabal/packages/hackage.haskell.org/00-index.tar.gz > 65 | $HOME/.cabal/packages/hackage.haskell.org/00-index.tar; 66 | fi 67 | - travis_retry cabal update -v 68 | - sed -i 's/^jobs:/-- jobs:/' ${HOME}/.cabal/config 69 | - cabal install --only-dependencies $CABFLAGS --dry -v > installplan.txt 70 | - sed -i -e '1,/^Resolving /d' installplan.txt; cat installplan.txt 71 | 72 | # check whether current requested install-plan matches cached package-db snapshot 73 | - if diff -u installplan.txt $HOME/.cabsnap/installplan.txt; 74 | then 75 | echo "cabal build-cache HIT"; 76 | rm -rfv .ghc; 77 | cp -a $HOME/.cabsnap/ghc $HOME/.ghc; 78 | cp -a $HOME/.cabsnap/lib $HOME/.cabsnap/share $HOME/.cabsnap/bin $HOME/.cabal/; 79 | else 80 | echo "cabal build-cache MISS"; 81 | rm -rf $HOME/.cabsnap; 82 | mkdir -p $HOME/.ghc $HOME/.cabal/lib $HOME/.cabal/share $HOME/.cabal/bin; 83 | cabal install --only-dependencies $CABFLAGS; 84 | fi 85 | 86 | # snapshot package-db on cache miss 87 | - if [ ! -d $HOME/.cabsnap ]; 88 | then 89 | echo "snapshotting package-db to build-cache"; 90 | mkdir $HOME/.cabsnap; 91 | cp -a $HOME/.ghc $HOME/.cabsnap/ghc; 92 | cp -a $HOME/.cabal/lib $HOME/.cabal/share $HOME/.cabal/bin installplan.txt $HOME/.cabsnap/; 93 | fi 94 | 95 | # Here starts the actual work to be performed for the package under test; 96 | # any command which exits with a non-zero exit code causes the build to fail. 97 | script: 98 | - if [ -f configure.ac ]; then autoreconf -i; fi 99 | - cabal configure $CABFLAGS -v2 # -v2 provides useful information for debugging 100 | - cabal build # this builds all libraries and executables (including tests/benchmarks) 101 | - if [ "x$NOTEST" = "x" ]; then cabal test; fi 102 | - cabal sdist # tests that a source-distribution can be generated 103 | 104 | # Check that the resulting source distribution can be built & installed. 105 | # If there are no other `.tar.gz` files in `dist`, this can be even simpler: 106 | # `cabal install --force-reinstalls dist/*-*.tar.gz` 107 | - SRC_TGZ=$(cabal info . | awk '{print $2;exit}').tar.gz && 108 | (cd dist && cabal install --force-reinstalls "$SRC_TGZ") 109 | 110 | # EOF 111 | -------------------------------------------------------------------------------- /Codec/Archive/Tar/Pack.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | ----------------------------------------------------------------------------- 3 | -- | 4 | -- Module : Codec.Archive.Tar 5 | -- Copyright : (c) 2007 Bjorn Bringert, 6 | -- 2008 Andrea Vezzosi, 7 | -- 2008-2009, 2012, 2016 Duncan Coutts 8 | -- License : BSD3 9 | -- 10 | -- Maintainer : duncan@community.haskell.org 11 | -- Portability : portable 12 | -- 13 | ----------------------------------------------------------------------------- 14 | module Codec.Archive.Tar.Pack ( 15 | pack, 16 | packFileEntry, 17 | packDirectoryEntry, 18 | 19 | getDirectoryContentsRecursive, 20 | ) where 21 | 22 | import Codec.Archive.Tar.Types 23 | 24 | import qualified Data.ByteString.Lazy as BS 25 | import System.FilePath 26 | ( () ) 27 | import qualified System.FilePath as FilePath.Native 28 | ( addTrailingPathSeparator, hasTrailingPathSeparator ) 29 | import System.Directory 30 | ( getDirectoryContents, doesDirectoryExist, getModificationTime 31 | , Permissions(..), getPermissions ) 32 | #if MIN_VERSION_directory(1,2,0) 33 | -- The directory package switched to the new time package 34 | import Data.Time.Clock 35 | ( UTCTime ) 36 | import Data.Time.Clock.POSIX 37 | ( utcTimeToPOSIXSeconds ) 38 | #else 39 | import System.Time 40 | ( ClockTime(..) ) 41 | #endif 42 | import System.IO 43 | ( IOMode(ReadMode), openBinaryFile, hFileSize ) 44 | import System.IO.Unsafe (unsafeInterleaveIO) 45 | 46 | -- | Creates a tar archive from a list of directory or files. Any directories 47 | -- specified will have their contents included recursively. Paths in the 48 | -- archive will be relative to the given base directory. 49 | -- 50 | -- This is a portable implementation of packing suitable for portable archives. 51 | -- In particular it only constructs 'NormalFile' and 'Directory' entries. Hard 52 | -- links and symbolic links are treated like ordinary files. It cannot be used 53 | -- to pack directories containing recursive symbolic links. Special files like 54 | -- FIFOs (named pipes), sockets or device files will also cause problems. 55 | -- 56 | -- An exception will be thrown for any file names that are too long to 57 | -- represent as a 'TarPath'. 58 | -- 59 | -- * This function returns results lazily. Subdirectories are scanned 60 | -- and files are read one by one as the list of entries is consumed. 61 | -- 62 | pack :: FilePath -- ^ Base directory 63 | -> [FilePath] -- ^ Files and directories to pack, relative to the base dir 64 | -> IO [Entry] 65 | pack baseDir paths0 = preparePaths baseDir paths0 >>= packPaths baseDir 66 | 67 | preparePaths :: FilePath -> [FilePath] -> IO [FilePath] 68 | preparePaths baseDir paths = 69 | fmap concat $ interleave 70 | [ do isDir <- doesDirectoryExist (baseDir path) 71 | if isDir 72 | then do entries <- getDirectoryContentsRecursive (baseDir path) 73 | let entries' = map (path ) entries 74 | dir = FilePath.Native.addTrailingPathSeparator path 75 | if null path then return entries' 76 | else return (dir : entries') 77 | else return [path] 78 | | path <- paths ] 79 | 80 | packPaths :: FilePath -> [FilePath] -> IO [Entry] 81 | packPaths baseDir paths = 82 | interleave 83 | [ do tarpath <- either fail return (toTarPath isDir relpath) 84 | if isDir then packDirectoryEntry filepath tarpath 85 | else packFileEntry filepath tarpath 86 | | relpath <- paths 87 | , let isDir = FilePath.Native.hasTrailingPathSeparator filepath 88 | filepath = baseDir relpath ] 89 | 90 | interleave :: [IO a] -> IO [a] 91 | interleave = unsafeInterleaveIO . go 92 | where 93 | go [] = return [] 94 | go (x:xs) = do 95 | x' <- x 96 | xs' <- interleave xs 97 | return (x':xs') 98 | 99 | -- | Construct a tar 'Entry' based on a local file. 100 | -- 101 | -- This sets the entry size, the data contained in the file and the file's 102 | -- modification time. If the file is executable then that information is also 103 | -- preserved. File ownership and detailed permissions are not preserved. 104 | -- 105 | -- * The file contents is read lazily. 106 | -- 107 | packFileEntry :: FilePath -- ^ Full path to find the file on the local disk 108 | -> TarPath -- ^ Path to use for the tar Entry in the archive 109 | -> IO Entry 110 | packFileEntry filepath tarpath = do 111 | mtime <- getModTime filepath 112 | perms <- getPermissions filepath 113 | file <- openBinaryFile filepath ReadMode 114 | size <- hFileSize file 115 | content <- BS.hGetContents file 116 | return (simpleEntry tarpath (NormalFile content (fromIntegral size))) { 117 | entryPermissions = if executable perms then executableFilePermissions 118 | else ordinaryFilePermissions, 119 | entryTime = mtime 120 | } 121 | 122 | -- | Construct a tar 'Entry' based on a local directory (but not its contents). 123 | -- 124 | -- The only attribute of the directory that is used is its modification time. 125 | -- Directory ownership and detailed permissions are not preserved. 126 | -- 127 | packDirectoryEntry :: FilePath -- ^ Full path to find the file on the local disk 128 | -> TarPath -- ^ Path to use for the tar Entry in the archive 129 | -> IO Entry 130 | packDirectoryEntry filepath tarpath = do 131 | mtime <- getModTime filepath 132 | return (directoryEntry tarpath) { 133 | entryTime = mtime 134 | } 135 | 136 | -- | This is a utility function, much like 'getDirectoryContents'. The 137 | -- difference is that it includes the contents of subdirectories. 138 | -- 139 | -- The paths returned are all relative to the top directory. Directory paths 140 | -- are distinguishable by having a trailing path separator 141 | -- (see 'FilePath.Native.hasTrailingPathSeparator'). 142 | -- 143 | -- All directories are listed before the files that they contain. Amongst the 144 | -- contents of a directory, subdirectories are listed after normal files. The 145 | -- overall result is that files within a directory will be together in a single 146 | -- contiguous group. This tends to improve file layout and IO performance when 147 | -- creating or extracting tar archives. 148 | -- 149 | -- * This function returns results lazily. Subdirectories are not scanned 150 | -- until the files entries in the parent directory have been consumed. 151 | -- 152 | getDirectoryContentsRecursive :: FilePath -> IO [FilePath] 153 | getDirectoryContentsRecursive dir0 = 154 | fmap tail (recurseDirectories dir0 [""]) 155 | 156 | recurseDirectories :: FilePath -> [FilePath] -> IO [FilePath] 157 | recurseDirectories _ [] = return [] 158 | recurseDirectories base (dir:dirs) = unsafeInterleaveIO $ do 159 | (files, dirs') <- collect [] [] =<< getDirectoryContents (base dir) 160 | 161 | files' <- recurseDirectories base (dirs' ++ dirs) 162 | return (dir : files ++ files') 163 | 164 | where 165 | collect files dirs' [] = return (reverse files, reverse dirs') 166 | collect files dirs' (entry:entries) | ignore entry 167 | = collect files dirs' entries 168 | collect files dirs' (entry:entries) = do 169 | let dirEntry = dir entry 170 | dirEntry' = FilePath.Native.addTrailingPathSeparator dirEntry 171 | isDirectory <- doesDirectoryExist (base dirEntry) 172 | if isDirectory 173 | then collect files (dirEntry':dirs') entries 174 | else collect (dirEntry:files) dirs' entries 175 | 176 | ignore ['.'] = True 177 | ignore ['.', '.'] = True 178 | ignore _ = False 179 | 180 | getModTime :: FilePath -> IO EpochTime 181 | getModTime path = do 182 | #if MIN_VERSION_directory(1,2,0) 183 | -- The directory package switched to the new time package 184 | t <- getModificationTime path 185 | return . floor . utcTimeToPOSIXSeconds $ t 186 | #else 187 | (TOD s _) <- getModificationTime path 188 | return $! fromIntegral s 189 | #endif 190 | -------------------------------------------------------------------------------- /htar/htar.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | module Main where 3 | 4 | import qualified Codec.Archive.Tar as Tar 5 | import qualified Codec.Archive.Tar.Entry as Tar 6 | 7 | import qualified Codec.Compression.GZip as GZip (compress, decompress) 8 | import qualified Codec.Compression.BZip as BZip (compress, decompress) 9 | 10 | import Control.Exception (throwIO) 11 | import qualified Data.ByteString.Lazy as BS 12 | import Data.ByteString.Lazy (ByteString) 13 | import Data.Bits (testBit) 14 | import Data.Char (toUpper) 15 | import System.Console.GetOpt (OptDescr(..), ArgDescr(..), ArgOrder(..), 16 | getOpt', usageInfo) 17 | import System.Environment (getArgs) 18 | import System.Exit (exitFailure) 19 | import System.IO (hPutStrLn, stderr) 20 | import Data.Time (formatTime) 21 | import Data.Time.Clock.POSIX (posixSecondsToUTCTime) 22 | #if MIN_VERSION_time(1,5,0) 23 | import Data.Time (defaultTimeLocale) 24 | #else 25 | import System.Locale (defaultTimeLocale) 26 | #endif 27 | 28 | main :: IO () 29 | main = do 30 | (opts, files) <- parseOptions =<< getArgs 31 | main' opts files 32 | 33 | main' :: Options -> [FilePath] -> IO () 34 | main' (Options { optFile = file, 35 | optDir = dir, 36 | optAction = action, 37 | optCompression = compression, 38 | optVerbosity = verbosity }) files = 39 | case action of 40 | NoAction -> die ["No action given. Specify one of -c, -t or -x."] 41 | Help -> printUsage 42 | Create -> output . compress compression 43 | . Tar.write =<< Tar.pack dir files 44 | Extract -> Tar.unpack dir . Tar.read . decompress compression =<< input 45 | List -> printEntries . Tar.read . decompress compression =<< input 46 | Append | compression /= None 47 | -> die ["Append cannot be used together with compression."] 48 | | file == "-" 49 | -> die ["Append must be used on a file, not stdin/stdout."] 50 | | otherwise 51 | -> Tar.append file dir files 52 | where 53 | input = if file == "-" then BS.getContents else BS.readFile file 54 | output = if file == "-" then BS.putStr else BS.writeFile file 55 | 56 | printEntries = Tar.foldEntries (\entry rest -> printEntry entry >> rest) 57 | (return ()) throwIO 58 | printEntry = putStrLn . entryInfo verbosity 59 | 60 | data Compression = None | GZip | BZip 61 | deriving (Show, Eq) 62 | 63 | compress :: Compression -> ByteString -> ByteString 64 | compress None = id 65 | compress GZip = GZip.compress 66 | compress BZip = BZip.compress 67 | 68 | decompress :: Compression -> ByteString -> ByteString 69 | decompress None = id 70 | decompress GZip = GZip.decompress 71 | decompress BZip = BZip.decompress 72 | 73 | data Verbosity = Verbose | Concise 74 | 75 | ------------------------ 76 | -- List archive contents 77 | 78 | entryInfo :: Verbosity -> Tar.Entry -> String 79 | entryInfo Verbose = detailedInfo 80 | entryInfo Concise = Tar.entryPath 81 | 82 | detailedInfo :: Tar.Entry -> String 83 | detailedInfo entry = 84 | unwords [ typeCode : permissions 85 | , justify 19 (owner ++ '/' : group) size 86 | , time 87 | , name ++ link ] 88 | where 89 | typeCode = case Tar.entryContent entry of 90 | Tar.HardLink _ -> 'h' 91 | Tar.SymbolicLink _ -> 'l' 92 | Tar.CharacterDevice _ _ -> 'c' 93 | Tar.BlockDevice _ _ -> 'b' 94 | Tar.Directory -> 'd' 95 | Tar.NamedPipe -> 'p' 96 | _ -> '-' 97 | permissions = concat [userPerms, groupPerms, otherPerms] 98 | where 99 | userPerms = formatPerms 8 7 6 11 's' 100 | groupPerms = formatPerms 5 4 3 10 's' 101 | otherPerms = formatPerms 2 1 0 9 't' 102 | formatPerms r w x s c = 103 | [if testBit m r then 'r' else '-' 104 | ,if testBit m w then 'w' else '-' 105 | ,if testBit m s 106 | then if testBit m x then c else toUpper c 107 | else if testBit m x then 'x' else '-'] 108 | m = Tar.entryPermissions entry 109 | owner = nameOrID ownerName ownerId 110 | group = nameOrID groupName groupId 111 | (Tar.Ownership ownerName groupName ownerId groupId) = 112 | Tar.entryOwnership entry 113 | nameOrID n i = if null n then show i else n 114 | size = case Tar.entryContent entry of 115 | Tar.NormalFile _ fileSize -> show fileSize 116 | _ -> "0" 117 | 118 | time = formatEpochTime "%Y-%m-%d %H:%M" (Tar.entryTime entry) 119 | name = Tar.entryPath entry 120 | link = case Tar.entryContent entry of 121 | Tar.HardLink l -> " link to " ++ Tar.fromLinkTarget l 122 | Tar.SymbolicLink l -> " -> " ++ Tar.fromLinkTarget l 123 | _ -> "" 124 | 125 | justify :: Int -> String -> String -> String 126 | justify width left right = left ++ padding ++ right 127 | where 128 | padding = replicate padWidth ' ' 129 | padWidth = max 1 (width - length left - length right) 130 | 131 | formatEpochTime :: String -> Tar.EpochTime -> String 132 | formatEpochTime f = 133 | formatTime defaultTimeLocale f . posixSecondsToUTCTime . fromIntegral 134 | 135 | ------------------------ 136 | -- Command line handling 137 | 138 | data Options = Options { 139 | optFile :: FilePath, -- "-" means stdin/stdout 140 | optDir :: FilePath, 141 | optAction :: Action, 142 | optCompression :: Compression, 143 | optVerbosity :: Verbosity 144 | } 145 | 146 | defaultOptions :: Options 147 | defaultOptions = Options { 148 | optFile = "-", 149 | optDir = "", 150 | optAction = NoAction, 151 | optCompression = None, 152 | optVerbosity = Concise 153 | } 154 | 155 | data Action = NoAction 156 | | Help 157 | | Create 158 | | Extract 159 | | List 160 | | Append 161 | deriving Show 162 | 163 | optDescr :: [OptDescr (Options -> Options)] 164 | optDescr = 165 | [ Option ['c'] ["create"] 166 | (action Create) 167 | "Create a new archive." 168 | , Option ['x'] ["extract", "get"] 169 | (action Extract) 170 | "Extract files from an archive." 171 | , Option ['t'] ["list"] 172 | (action List) 173 | "List the contents of an archive." 174 | , Option ['r'] ["append"] 175 | (action Append) 176 | "Append files to the end of an archive." 177 | , Option ['f'] ["file"] 178 | (ReqArg (\f o -> o { optFile = f}) "ARCHIVE") 179 | "Use archive file ARCHIVE." 180 | , Option ['C'] ["directory"] 181 | (ReqArg (\d o -> o { optDir = d }) "DIR") 182 | "Create or extract relative to DIR." 183 | , Option ['z'] ["gzip", "gunzip", "ungzip"] 184 | (compression GZip) 185 | "Use gzip compression." 186 | , Option ['j'] ["bzip2"] 187 | (compression BZip) 188 | "Use bzip2 compression." 189 | , Option ['v'] ["verbose"] 190 | (NoArg (\o -> o { optVerbosity = Verbose })) 191 | "Verbosely list files processed." 192 | , Option ['h', '?'] ["help"] 193 | (action Help) 194 | "Print this help output." 195 | ] 196 | where 197 | action a = NoArg (\o -> o { optAction = a }) 198 | compression c = NoArg (\o -> o { optCompression = c }) 199 | 200 | printUsage :: IO () 201 | printUsage = putStrLn (usageInfo headder optDescr) 202 | where 203 | headder = unlines ["htar creates and extracts TAR archives.", 204 | "", 205 | "Usage: htar [OPTION ...] [FILE ...]"] 206 | 207 | parseOptions :: [String] -> IO (Options, [FilePath]) 208 | parseOptions args = 209 | let (fs, files, nonopts, errors) = getOpt' Permute optDescr args 210 | in case (nonopts, errors) of 211 | ([], []) -> return $ (foldl (flip ($)) defaultOptions fs, files) 212 | (_ , (_:_)) -> die errors 213 | (_ , _) -> die (map (("unrecognized option "++).show) nonopts) 214 | 215 | die :: [String] -> IO a 216 | die errs = do 217 | mapM_ (\e -> hPutStrLn stderr $ "htar: " ++ e) $ errs 218 | hPutStrLn stderr "Try `htar --help' for more information." 219 | exitFailure 220 | -------------------------------------------------------------------------------- /Codec/Archive/Tar/Check.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveDataTypeable #-} 2 | ----------------------------------------------------------------------------- 3 | -- | 4 | -- Module : Codec.Archive.Tar 5 | -- Copyright : (c) 2008-2012 Duncan Coutts 6 | -- 2011 Max Bolingbroke 7 | -- License : BSD3 8 | -- 9 | -- Maintainer : duncan@community.haskell.org 10 | -- Portability : portable 11 | -- 12 | -- Perform various checks on tar file entries. 13 | -- 14 | ----------------------------------------------------------------------------- 15 | module Codec.Archive.Tar.Check ( 16 | 17 | -- * Security 18 | checkSecurity, 19 | FileNameError(..), 20 | 21 | -- * Tarbombs 22 | checkTarbomb, 23 | TarBombError(..), 24 | 25 | -- * Portability 26 | checkPortability, 27 | PortabilityError(..), 28 | PortabilityPlatform, 29 | ) where 30 | 31 | import Codec.Archive.Tar.Types 32 | 33 | import Data.Typeable (Typeable) 34 | import Control.Exception (Exception) 35 | import Control.Monad (MonadPlus(mplus)) 36 | import qualified System.FilePath as FilePath.Native 37 | ( splitDirectories, isAbsolute, isValid ) 38 | 39 | import qualified System.FilePath.Windows as FilePath.Windows 40 | import qualified System.FilePath.Posix as FilePath.Posix 41 | 42 | 43 | -------------------------- 44 | -- Security 45 | -- 46 | 47 | -- | This function checks a sequence of tar entries for file name security 48 | -- problems. It checks that: 49 | -- 50 | -- * file paths are not absolute 51 | -- 52 | -- * file paths do not contain any path components that are \"@..@\" 53 | -- 54 | -- * file names are valid 55 | -- 56 | -- These checks are from the perspective of the current OS. That means we check 57 | -- for \"@C:\blah@\" files on Windows and \"\/blah\" files on Unix. For archive 58 | -- entry types 'HardLink' and 'SymbolicLink' the same checks are done for the 59 | -- link target. A failure in any entry terminates the sequence of entries with 60 | -- an error. 61 | -- 62 | checkSecurity :: Entries e -> Entries (Either e FileNameError) 63 | checkSecurity = checkEntries checkEntrySecurity 64 | 65 | checkEntrySecurity :: Entry -> Maybe FileNameError 66 | checkEntrySecurity entry = case entryContent entry of 67 | HardLink link -> check (entryPath entry) 68 | `mplus` check (fromLinkTarget link) 69 | SymbolicLink link -> check (entryPath entry) 70 | `mplus` check (fromLinkTarget link) 71 | _ -> check (entryPath entry) 72 | 73 | where 74 | check name 75 | | FilePath.Native.isAbsolute name 76 | = Just $ AbsoluteFileName name 77 | 78 | | not (FilePath.Native.isValid name) 79 | = Just $ InvalidFileName name 80 | 81 | | any (=="..") (FilePath.Native.splitDirectories name) 82 | = Just $ InvalidFileName name 83 | 84 | | otherwise = Nothing 85 | 86 | -- | Errors arising from tar file names being in some way invalid or dangerous 87 | data FileNameError 88 | = InvalidFileName FilePath 89 | | AbsoluteFileName FilePath 90 | deriving (Typeable) 91 | 92 | instance Show FileNameError where 93 | show = showFileNameError Nothing 94 | 95 | instance Exception FileNameError 96 | 97 | showFileNameError :: Maybe PortabilityPlatform -> FileNameError -> String 98 | showFileNameError mb_plat err = case err of 99 | InvalidFileName path -> "Invalid" ++ plat ++ " file name in tar archive: " ++ show path 100 | AbsoluteFileName path -> "Absolute" ++ plat ++ " file name in tar archive: " ++ show path 101 | where plat = maybe "" (' ':) mb_plat 102 | 103 | 104 | -------------------------- 105 | -- Tarbombs 106 | -- 107 | 108 | -- | This function checks a sequence of tar entries for being a \"tar bomb\". 109 | -- This means that the tar file does not follow the standard convention that 110 | -- all entries are within a single subdirectory, e.g. a file \"foo.tar\" would 111 | -- usually have all entries within the \"foo/\" subdirectory. 112 | -- 113 | -- Given the expected subdirectory, this function checks all entries are within 114 | -- that subdirectroy. 115 | -- 116 | -- Note: This check must be used in conjunction with 'checkSecurity' 117 | -- (or 'checkPortability'). 118 | -- 119 | checkTarbomb :: FilePath -> Entries e -> Entries (Either e TarBombError) 120 | checkTarbomb expectedTopDir = checkEntries (checkEntryTarbomb expectedTopDir) 121 | 122 | checkEntryTarbomb :: FilePath -> Entry -> Maybe TarBombError 123 | checkEntryTarbomb expectedTopDir entry = 124 | case FilePath.Native.splitDirectories (entryPath entry) of 125 | (topDir:_) | topDir == expectedTopDir -> Nothing 126 | _ -> Just $ TarBombError expectedTopDir 127 | 128 | -- | An error that occurs if a tar file is a \"tar bomb\" that would extract 129 | -- files outside of the intended directory. 130 | data TarBombError = TarBombError FilePath 131 | deriving (Typeable) 132 | 133 | instance Exception TarBombError 134 | 135 | instance Show TarBombError where 136 | show (TarBombError expectedTopDir) 137 | = "File in tar archive is not in the expected directory " ++ show expectedTopDir 138 | 139 | 140 | -------------------------- 141 | -- Portability 142 | -- 143 | 144 | -- | This function checks a sequence of tar entries for a number of portability 145 | -- issues. It will complain if: 146 | -- 147 | -- * The old \"Unix V7\" or \"gnu\" formats are used. For maximum portability 148 | -- only the POSIX standard \"ustar\" format should be used. 149 | -- 150 | -- * A non-portable entry type is used. Only ordinary files, hard links, 151 | -- symlinks and directories are portable. Device files, pipes and others are 152 | -- not portable between all common operating systems. 153 | -- 154 | -- * Non-ASCII characters are used in file names. There is no agreed portable 155 | -- convention for Unicode or other extended character sets in file names in 156 | -- tar archives. 157 | -- 158 | -- * File names that would not be portable to both Unix and Windows. This check 159 | -- includes characters that are valid in both systems and the \'/\' vs \'\\\' 160 | -- directory separator conventions. 161 | -- 162 | checkPortability :: Entries e -> Entries (Either e PortabilityError) 163 | checkPortability = checkEntries checkEntryPortability 164 | 165 | checkEntryPortability :: Entry -> Maybe PortabilityError 166 | checkEntryPortability entry 167 | | entryFormat entry `elem` [V7Format, GnuFormat] 168 | = Just $ NonPortableFormat (entryFormat entry) 169 | 170 | | not (portableFileType (entryContent entry)) 171 | = Just NonPortableFileType 172 | 173 | | not (all portableChar posixPath) 174 | = Just $ NonPortableEntryNameChar posixPath 175 | 176 | | not (FilePath.Posix.isValid posixPath) 177 | = Just $ NonPortableFileName "unix" (InvalidFileName posixPath) 178 | | not (FilePath.Windows.isValid windowsPath) 179 | = Just $ NonPortableFileName "windows" (InvalidFileName windowsPath) 180 | 181 | | FilePath.Posix.isAbsolute posixPath 182 | = Just $ NonPortableFileName "unix" (AbsoluteFileName posixPath) 183 | | FilePath.Windows.isAbsolute windowsPath 184 | = Just $ NonPortableFileName "windows" (AbsoluteFileName windowsPath) 185 | 186 | | any (=="..") (FilePath.Posix.splitDirectories posixPath) 187 | = Just $ NonPortableFileName "unix" (InvalidFileName posixPath) 188 | | any (=="..") (FilePath.Windows.splitDirectories windowsPath) 189 | = Just $ NonPortableFileName "windows" (InvalidFileName windowsPath) 190 | 191 | | otherwise = Nothing 192 | 193 | where 194 | tarPath = entryTarPath entry 195 | posixPath = fromTarPathToPosixPath tarPath 196 | windowsPath = fromTarPathToWindowsPath tarPath 197 | 198 | portableFileType ftype = case ftype of 199 | NormalFile {} -> True 200 | HardLink {} -> True 201 | SymbolicLink {} -> True 202 | Directory -> True 203 | _ -> False 204 | 205 | portableChar c = c <= '\127' 206 | 207 | -- | Portability problems in a tar archive 208 | data PortabilityError 209 | = NonPortableFormat Format 210 | | NonPortableFileType 211 | | NonPortableEntryNameChar FilePath 212 | | NonPortableFileName PortabilityPlatform FileNameError 213 | deriving (Typeable) 214 | 215 | -- | The name of a platform that portability issues arise from 216 | type PortabilityPlatform = String 217 | 218 | instance Exception PortabilityError 219 | 220 | instance Show PortabilityError where 221 | show (NonPortableFormat format) = "Archive is in the " ++ fmt ++ " format" 222 | where fmt = case format of V7Format -> "old Unix V7 tar" 223 | UstarFormat -> "ustar" -- I never generate this but a user might 224 | GnuFormat -> "GNU tar" 225 | show NonPortableFileType = "Non-portable file type in archive" 226 | show (NonPortableEntryNameChar posixPath) 227 | = "Non-portable character in archive entry name: " ++ show posixPath 228 | show (NonPortableFileName platform err) 229 | = showFileNameError (Just platform) err 230 | 231 | 232 | -------------------------- 233 | -- Utils 234 | -- 235 | 236 | checkEntries :: (Entry -> Maybe e') -> Entries e -> Entries (Either e e') 237 | checkEntries checkEntry = 238 | mapEntries (\entry -> maybe (Right entry) Left (checkEntry entry)) 239 | -------------------------------------------------------------------------------- /Codec/Archive/Tar/Read.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP, DeriveDataTypeable, BangPatterns #-} 2 | ----------------------------------------------------------------------------- 3 | -- | 4 | -- Module : Codec.Archive.Tar.Read 5 | -- Copyright : (c) 2007 Bjorn Bringert, 6 | -- 2008 Andrea Vezzosi, 7 | -- 2008-2009 Duncan Coutts, 8 | -- 2011 Max Bolingbroke 9 | -- License : BSD3 10 | -- 11 | -- Maintainer : duncan@community.haskell.org 12 | -- Portability : portable 13 | -- 14 | ----------------------------------------------------------------------------- 15 | module Codec.Archive.Tar.Read (read, FormatError(..)) where 16 | 17 | import Codec.Archive.Tar.Types 18 | 19 | import Data.Char (ord) 20 | import Data.Int (Int64) 21 | import Data.Bits (Bits(shiftL)) 22 | import Control.Exception (Exception(..)) 23 | import Data.Typeable (Typeable) 24 | import Control.Applicative 25 | import Control.Monad 26 | import Control.DeepSeq 27 | 28 | import qualified Data.ByteString as BS 29 | import qualified Data.ByteString.Char8 as BS.Char8 30 | import qualified Data.ByteString.Unsafe as BS 31 | import qualified Data.ByteString.Lazy as LBS 32 | 33 | import Prelude hiding (read) 34 | 35 | #if !MIN_VERSION_bytestring(0,10,0) 36 | import Data.Monoid (Monoid(..)) 37 | import qualified Data.ByteString.Lazy.Internal as LBS 38 | #endif 39 | 40 | -- | Errors that can be encountered when parsing a Tar archive. 41 | data FormatError 42 | = TruncatedArchive 43 | | ShortTrailer 44 | | BadTrailer 45 | | TrailingJunk 46 | | ChecksumIncorrect 47 | | NotTarFormat 48 | | UnrecognisedTarFormat 49 | | HeaderBadNumericEncoding 50 | #if MIN_VERSION_base(4,8,0) 51 | deriving (Eq, Show, Typeable) 52 | 53 | instance Exception FormatError where 54 | displayException TruncatedArchive = "truncated tar archive" 55 | displayException ShortTrailer = "short tar trailer" 56 | displayException BadTrailer = "bad tar trailer" 57 | displayException TrailingJunk = "tar file has trailing junk" 58 | displayException ChecksumIncorrect = "tar checksum error" 59 | displayException NotTarFormat = "data is not in tar format" 60 | displayException UnrecognisedTarFormat = "tar entry not in a recognised format" 61 | displayException HeaderBadNumericEncoding = "tar header is malformed (bad numeric encoding)" 62 | #else 63 | deriving (Eq, Typeable) 64 | 65 | instance Show FormatError where 66 | show TruncatedArchive = "truncated tar archive" 67 | show ShortTrailer = "short tar trailer" 68 | show BadTrailer = "bad tar trailer" 69 | show TrailingJunk = "tar file has trailing junk" 70 | show ChecksumIncorrect = "tar checksum error" 71 | show NotTarFormat = "data is not in tar format" 72 | show UnrecognisedTarFormat = "tar entry not in a recognised format" 73 | show HeaderBadNumericEncoding = "tar header is malformed (bad numeric encoding)" 74 | 75 | instance Exception FormatError 76 | #endif 77 | 78 | instance NFData FormatError where 79 | rnf !_ = () -- enumerations are fully strict by construction 80 | 81 | -- | Convert a data stream in the tar file format into an internal data 82 | -- structure. Decoding errors are reported by the 'Fail' constructor of the 83 | -- 'Entries' type. 84 | -- 85 | -- * The conversion is done lazily. 86 | -- 87 | read :: LBS.ByteString -> Entries FormatError 88 | read = unfoldEntries getEntry 89 | 90 | getEntry :: LBS.ByteString -> Either FormatError (Maybe (Entry, LBS.ByteString)) 91 | getEntry bs 92 | | BS.length header < 512 = Left TruncatedArchive 93 | 94 | -- Tar files end with at least two blocks of all '0'. Checking this serves 95 | -- two purposes. It checks the format but also forces the tail of the data 96 | -- which is necessary to close the file if it came from a lazily read file. 97 | | LBS.head bs == 0 = case LBS.splitAt 1024 bs of 98 | (end, trailing) 99 | | LBS.length end /= 1024 -> Left ShortTrailer 100 | | not (LBS.all (== 0) end) -> Left BadTrailer 101 | | not (LBS.all (== 0) trailing) -> Left TrailingJunk 102 | | otherwise -> Right Nothing 103 | 104 | | otherwise = partial $ do 105 | 106 | case (chksum_, format_) of 107 | (Ok chksum, _ ) | correctChecksum header chksum -> return () 108 | (Ok _, Ok _) -> Error ChecksumIncorrect 109 | _ -> Error NotTarFormat 110 | 111 | -- These fields are partial, have to check them 112 | format <- format_; mode <- mode_; 113 | uid <- uid_; gid <- gid_; 114 | size <- size_; mtime <- mtime_; 115 | devmajor <- devmajor_; devminor <- devminor_; 116 | 117 | let content = LBS.take size (LBS.drop 512 bs) 118 | padding = (512 - size) `mod` 512 119 | bs' = LBS.drop (512 + size + padding) bs 120 | 121 | entry = Entry { 122 | entryTarPath = TarPath name prefix, 123 | entryContent = case typecode of 124 | '\0' -> NormalFile content size 125 | '0' -> NormalFile content size 126 | '1' -> HardLink (LinkTarget linkname) 127 | '2' -> SymbolicLink (LinkTarget linkname) 128 | _ | format == V7Format 129 | -> OtherEntryType typecode content size 130 | '3' -> CharacterDevice devmajor devminor 131 | '4' -> BlockDevice devmajor devminor 132 | '5' -> Directory 133 | '6' -> NamedPipe 134 | '7' -> NormalFile content size 135 | _ -> OtherEntryType typecode content size, 136 | entryPermissions = mode, 137 | entryOwnership = Ownership (BS.Char8.unpack uname) 138 | (BS.Char8.unpack gname) uid gid, 139 | entryTime = mtime, 140 | entryFormat = format 141 | } 142 | 143 | return (Just (entry, bs')) 144 | 145 | where 146 | #if MIN_VERSION_bytestring(0,10,0) 147 | header = LBS.toStrict (LBS.take 512 bs) 148 | #else 149 | header = toStrict (LBS.take 512 bs) 150 | toStrict = LBS.foldrChunks mappend mempty 151 | #endif 152 | 153 | name = getString 0 100 header 154 | mode_ = getOct 100 8 header 155 | uid_ = getOct 108 8 header 156 | gid_ = getOct 116 8 header 157 | size_ = getOct 124 12 header 158 | mtime_ = getOct 136 12 header 159 | chksum_ = getOct 148 8 header 160 | typecode = getByte 156 header 161 | linkname = getString 157 100 header 162 | magic = getChars 257 8 header 163 | uname = getString 265 32 header 164 | gname = getString 297 32 header 165 | devmajor_ = getOct 329 8 header 166 | devminor_ = getOct 337 8 header 167 | prefix = getString 345 155 header 168 | -- trailing = getBytes 500 12 header 169 | 170 | format_ 171 | | magic == ustarMagic = return UstarFormat 172 | | magic == gnuMagic = return GnuFormat 173 | | magic == v7Magic = return V7Format 174 | | otherwise = Error UnrecognisedTarFormat 175 | 176 | v7Magic, ustarMagic, gnuMagic :: BS.ByteString 177 | v7Magic = BS.Char8.pack "\0\0\0\0\0\0\0\0" 178 | ustarMagic = BS.Char8.pack "ustar\NUL00" 179 | gnuMagic = BS.Char8.pack "ustar \NUL" 180 | 181 | correctChecksum :: BS.ByteString -> Int -> Bool 182 | correctChecksum header checksum = checksum == checksum' 183 | where 184 | -- sum of all 512 bytes in the header block, 185 | -- treating each byte as an 8-bit unsigned value 186 | sumchars = BS.foldl' (\x y -> x + fromIntegral y) 0 187 | -- treating the 8 bytes of chksum as blank characters. 188 | checksum' = sumchars (BS.take 148 header) 189 | + 256 -- 256 = sumchars (BS.Char8.replicate 8 ' ') 190 | + sumchars (BS.drop 156 header) 191 | 192 | -- * TAR format primitive input 193 | 194 | {-# SPECIALISE getOct :: Int -> Int -> BS.ByteString -> Partial FormatError Int #-} 195 | {-# SPECIALISE getOct :: Int -> Int -> BS.ByteString -> Partial FormatError Int64 #-} 196 | getOct :: (Integral a, Bits a) => Int -> Int -> BS.ByteString -> Partial FormatError a 197 | getOct off len = parseOct 198 | . BS.Char8.takeWhile (\c -> c /= '\NUL' && c /= ' ') 199 | . BS.Char8.dropWhile (== ' ') 200 | . getBytes off len 201 | where 202 | parseOct s | BS.null s = return 0 203 | -- As a star extension, octal fields can hold a base-256 value if the high 204 | -- bit of the initial character is set. The initial character can be: 205 | -- 0x80 ==> trailing characters hold a positive base-256 value 206 | -- 0xFF ==> trailing characters hold a negative base-256 value 207 | -- 208 | -- In both cases, there won't be a trailing NUL/space. 209 | -- 210 | -- GNU tar seems to contain a half-implementation of code that deals with 211 | -- extra bits in the first character, but I don't think it works and the 212 | -- docs I can find on star seem to suggest that these will always be 0, 213 | -- which is what I will assume. 214 | parseOct s | BS.head s == 128 = return (readBytes (BS.tail s)) 215 | | BS.head s == 255 = return (negate (readBytes (BS.tail s))) 216 | parseOct s = case readOct s of 217 | Just x -> return x 218 | Nothing -> Error HeaderBadNumericEncoding 219 | 220 | readBytes :: (Integral a, Bits a) => BS.ByteString -> a 221 | readBytes = BS.foldl' (\acc x -> acc `shiftL` 8 + fromIntegral x) 0 222 | 223 | getBytes :: Int -> Int -> BS.ByteString -> BS.ByteString 224 | getBytes off len = BS.take len . BS.drop off 225 | 226 | getByte :: Int -> BS.ByteString -> Char 227 | getByte off bs = BS.Char8.index bs off 228 | 229 | getChars :: Int -> Int -> BS.ByteString -> BS.ByteString 230 | getChars off len = getBytes off len 231 | 232 | getString :: Int -> Int -> BS.ByteString -> BS.ByteString 233 | getString off len = BS.copy . BS.Char8.takeWhile (/='\0') . getBytes off len 234 | 235 | -- These days we'd just use Either, but in older versions of base there was no 236 | -- Monad instance for Either, it was in mtl with an anoying Error constraint. 237 | -- 238 | data Partial e a = Error e | Ok a 239 | 240 | partial :: Partial e a -> Either e a 241 | partial (Error msg) = Left msg 242 | partial (Ok x) = Right x 243 | 244 | instance Functor (Partial e) where 245 | fmap = liftM 246 | 247 | instance Applicative (Partial e) where 248 | pure = Ok 249 | (<*>) = ap 250 | 251 | instance Monad (Partial e) where 252 | return = pure 253 | Error m >>= _ = Error m 254 | Ok x >>= k = k x 255 | fail = error "fail @(Partial e)" 256 | 257 | {-# SPECIALISE readOct :: BS.ByteString -> Maybe Int #-} 258 | {-# SPECIALISE readOct :: BS.ByteString -> Maybe Int64 #-} 259 | readOct :: Integral n => BS.ByteString -> Maybe n 260 | readOct bs0 = case go 0 0 bs0 of 261 | -1 -> Nothing 262 | n -> Just n 263 | where 264 | go :: Integral n => Int -> n -> BS.ByteString -> n 265 | go !i !n !bs 266 | | BS.null bs = if i == 0 then -1 else n 267 | | otherwise = 268 | case BS.unsafeHead bs of 269 | w | w >= 0x30 270 | && w <= 0x39 -> go (i+1) 271 | (n * 8 + (fromIntegral w - 0x30)) 272 | (BS.unsafeTail bs) 273 | | otherwise -> -1 274 | -------------------------------------------------------------------------------- /Codec/Archive/Tar.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | ----------------------------------------------------------------------------- 3 | -- | 4 | -- Module : Codec.Archive.Tar 5 | -- Copyright : (c) 2007 Bjorn Bringert, 6 | -- 2008 Andrea Vezzosi, 7 | -- 2008-2012 Duncan Coutts 8 | -- License : BSD3 9 | -- 10 | -- Maintainer : duncan@community.haskell.org 11 | -- Portability : portable 12 | -- 13 | -- Reading, writing and manipulating \"@.tar@\" archive files. 14 | -- 15 | -- This module uses common names and so is designed to be imported qualified: 16 | -- 17 | -- > import qualified Codec.Archive.Tar as Tar 18 | -- 19 | ----------------------------------------------------------------------------- 20 | module Codec.Archive.Tar ( 21 | 22 | -- | Tar archive files are used to store a collection of other files in a 23 | -- single file. They consists of a sequence of entries. Each entry describes 24 | -- a file or directory (or some other special kind of file). The entry stores 25 | -- a little bit of meta-data, in particular the file or directory name. 26 | -- 27 | -- Unlike some other archive formats, a tar file contains no index. The 28 | -- information about each entry is stored next to the entry. Because of this, 29 | -- tar files are almost always processed linearly rather than in a 30 | -- random-access fashion. 31 | -- 32 | -- The functions in this package are designed for working on tar files 33 | -- linearly and lazily. This makes it possible to do many operations in 34 | -- constant space rather than having to load the entire archive into memory. 35 | -- 36 | -- It can read and write standard POSIX tar files and also the GNU and old 37 | -- Unix V7 tar formats. The convenience functions that are provided in the 38 | -- "Codec.Archive.Tar.Entry" module for creating archive entries are 39 | -- primarily designed for standard portable archives. If you need to 40 | -- construct GNU format archives or exactly preserve file ownership and 41 | -- permissions then you will need to write some extra helper functions. 42 | -- 43 | -- This module contains just the simple high level operations without 44 | -- exposing the all the details of tar files. If you need to inspect tar 45 | -- entries in more detail or construct them directly then you also need 46 | -- the module "Codec.Archive.Tar.Entry". 47 | 48 | -- * High level \"all in one\" operations 49 | create, 50 | extract, 51 | append, 52 | 53 | -- * Notes 54 | -- ** Compressed tar archives 55 | -- | Tar files are commonly used in conjunction with gzip compression, as in 56 | -- \"@.tar.gz@\" or \"@.tar.bz2@\" files. This module does not directly 57 | -- handle compressed tar files however they can be handled easily by 58 | -- composing functions from this module and the modules 59 | -- @Codec.Compression.GZip@ or @Codec.Compression.BZip@ 60 | -- (see @zlib@ or @bzlib@ packages). 61 | -- 62 | -- Creating a compressed \"@.tar.gz@\" file is just a minor variation on the 63 | -- 'create' function, but where throw compression into the pipeline: 64 | -- 65 | -- > BS.writeFile tar . GZip.compress . Tar.write =<< Tar.pack base dir 66 | -- 67 | -- Similarly, extracting a compressed \"@.tar.gz@\" is just a minor variation 68 | -- on the 'extract' function where we use decompression in the pipeline: 69 | -- 70 | -- > Tar.unpack dir . Tar.read . GZip.decompress =<< BS.readFile tar 71 | -- 72 | 73 | -- ** Security 74 | -- | This is pretty important. A maliciously constructed tar archives could 75 | -- contain entries that specify bad file names. It could specify absolute 76 | -- file names like \"@\/etc\/passwd@\" or relative files outside of the 77 | -- archive like \"..\/..\/..\/something\". This security problem is commonly 78 | -- called a \"directory traversal vulnerability\". Historically, such 79 | -- vulnerabilities have been common in packages handling tar archives. 80 | -- 81 | -- The 'extract' and 'unpack' functions check for bad file names. See the 82 | -- 'checkSecurity' function for more details. If you need to do any custom 83 | -- unpacking then you should use this. 84 | 85 | -- ** Tarbombs 86 | -- | A \"tarbomb\" is a @.tar@ file where not all entries are in a 87 | -- subdirectory but instead files extract into the top level directory. The 88 | -- 'extract' function does not check for these however if you want to do 89 | -- that you can use the 'checkTarbomb' function like so: 90 | -- 91 | -- > Tar.unpack dir . Tar.checkTarbomb expectedDir 92 | -- > . Tar.read =<< BS.readFile tar 93 | -- 94 | -- In this case extraction will fail if any file is outside of @expectedDir@. 95 | 96 | -- * Converting between internal and external representation 97 | -- | Note, you cannot expect @write . read@ to give exactly the same output 98 | -- as input. You can expect the information to be preserved exactly however. 99 | -- This is because 'read' accepts common format variations while 'write' 100 | -- produces the standard format. 101 | read, 102 | write, 103 | 104 | -- * Packing and unpacking files to\/from internal representation 105 | -- | These functions are for packing and unpacking portable archives. They 106 | -- are not suitable in cases where it is important to preserve file ownership 107 | -- and permissions or to archive special files like named pipes and Unix 108 | -- device files. 109 | pack, 110 | unpack, 111 | 112 | -- * Types 113 | -- ** Tar entry type 114 | -- | This module provides only very simple and limited read-only access to 115 | -- the 'Entry' type. If you need access to the details or if you need to 116 | -- construct your own entries then also import "Codec.Archive.Tar.Entry". 117 | Entry, 118 | entryPath, 119 | entryContent, 120 | EntryContent(..), 121 | 122 | -- ** Sequences of tar entries 123 | Entries(..), 124 | mapEntries, 125 | mapEntriesNoFail, 126 | foldEntries, 127 | foldlEntries, 128 | unfoldEntries, 129 | 130 | -- * Error handling 131 | -- | Reading tar files can fail if the data does not match the tar file 132 | -- format correctly. 133 | -- 134 | -- The style of error handling by returning structured errors. The pure 135 | -- functions in the library do not throw exceptions, they return the errors 136 | -- as data. The IO actions in the library can throw exceptions, in particular 137 | -- the 'unpack' action does this. All the error types used are an instance of 138 | -- the standard 'Exception' class so it is possible to 'throw' and 'catch' 139 | -- them. 140 | 141 | -- ** Errors from reading tar files 142 | FormatError(..), 143 | 144 | #ifdef TESTS 145 | prop_write_read_ustar, 146 | prop_write_read_gnu, 147 | prop_write_read_v7, 148 | #endif 149 | ) where 150 | 151 | import Codec.Archive.Tar.Types 152 | 153 | import Codec.Archive.Tar.Read 154 | import Codec.Archive.Tar.Write 155 | 156 | import Codec.Archive.Tar.Pack 157 | import Codec.Archive.Tar.Unpack 158 | import Codec.Archive.Tar.Index (hSeekEndEntryOffset) 159 | 160 | import Codec.Archive.Tar.Check 161 | 162 | import Control.Exception (Exception, throw, catch) 163 | import qualified Data.ByteString.Lazy as BS 164 | import System.IO (withFile, IOMode(..)) 165 | import Prelude hiding (read) 166 | 167 | -- | Create a new @\".tar\"@ file from a directory of files. 168 | -- 169 | -- It is equivalent to calling the standard @tar@ program like so: 170 | -- 171 | -- @$ tar -f tarball.tar -C base -c dir@ 172 | -- 173 | -- This assumes a directory @.\/base\/dir@ with files inside, eg 174 | -- @.\/base\/dir\/foo.txt@. The file names inside the resulting tar file will be 175 | -- relative to @dir@, eg @dir\/foo.txt@. 176 | -- 177 | -- This is a high level \"all in one\" operation. Since you may need variations 178 | -- on this function it is instructive to see how it is written. It is just: 179 | -- 180 | -- > BS.writeFile tar . Tar.write =<< Tar.pack base paths 181 | -- 182 | -- Notes: 183 | -- 184 | -- The files and directories must not change during this operation or the 185 | -- result is not well defined. 186 | -- 187 | -- The intention of this function is to create tarballs that are portable 188 | -- between systems. It is /not/ suitable for doing file system backups because 189 | -- file ownership and permissions are not fully preserved. File ownership is 190 | -- not preserved at all. File permissions are set to simple portable values: 191 | -- 192 | -- * @rw-r--r--@ for normal files 193 | -- 194 | -- * @rwxr-xr-x@ for executable files 195 | -- 196 | -- * @rwxr-xr-x@ for directories 197 | -- 198 | create :: FilePath -- ^ Path of the \".tar\" file to write. 199 | -> FilePath -- ^ Base directory 200 | -> [FilePath] -- ^ Files and directories to archive, relative to base dir 201 | -> IO () 202 | create tar base paths = BS.writeFile tar . write =<< pack base paths 203 | 204 | -- | Extract all the files contained in a @\".tar\"@ file. 205 | -- 206 | -- It is equivalent to calling the standard @tar@ program like so: 207 | -- 208 | -- @$ tar -x -f tarball.tar -C dir@ 209 | -- 210 | -- So for example if the @tarball.tar@ file contains @foo\/bar.txt@ then this 211 | -- will extract it to @dir\/foo\/bar.txt@. 212 | -- 213 | -- This is a high level \"all in one\" operation. Since you may need variations 214 | -- on this function it is instructive to see how it is written. It is just: 215 | -- 216 | -- > Tar.unpack dir . Tar.read =<< BS.readFile tar 217 | -- 218 | -- Notes: 219 | -- 220 | -- Extracting can fail for a number of reasons. The tarball may be incorrectly 221 | -- formatted. There may be IO or permission errors. In such cases an exception 222 | -- will be thrown and extraction will not continue. 223 | -- 224 | -- Since the extraction may fail part way through it is not atomic. For this 225 | -- reason you may want to extract into an empty directory and, if the 226 | -- extraction fails, recursively delete the directory. 227 | -- 228 | -- Security: only files inside the target directory will be written. Tarballs 229 | -- containing entries that point outside of the tarball (either absolute paths 230 | -- or relative paths) will be caught and an exception will be thrown. 231 | -- 232 | extract :: FilePath -- ^ Destination directory 233 | -> FilePath -- ^ Tarball 234 | -> IO () 235 | extract dir tar = unpack dir . read =<< BS.readFile tar 236 | 237 | -- | Append new entries to a @\".tar\"@ file from a directory of files. 238 | -- 239 | -- This is much like 'create', except that all the entries are added to the 240 | -- end of an existing tar file. Or if the file does not already exists then 241 | -- it behaves the same as 'create'. 242 | -- 243 | append :: FilePath -- ^ Path of the \".tar\" file to write. 244 | -> FilePath -- ^ Base directory 245 | -> [FilePath] -- ^ Files and directories to archive, relative to base dir 246 | -> IO () 247 | append tar base paths = 248 | withFile tar ReadWriteMode $ \hnd -> do 249 | _ <- hSeekEndEntryOffset hnd Nothing 250 | BS.hPut hnd . write =<< pack base paths 251 | 252 | ------------------------- 253 | -- Correctness properties 254 | -- 255 | 256 | #ifdef TESTS 257 | 258 | prop_write_read_ustar :: [Entry] -> Bool 259 | prop_write_read_ustar entries = 260 | foldr Next Done entries' == read (write entries') 261 | where 262 | entries' = [ e { entryFormat = UstarFormat } | e <- entries ] 263 | 264 | prop_write_read_gnu :: [Entry] -> Bool 265 | prop_write_read_gnu entries = 266 | foldr Next Done entries' == read (write entries') 267 | where 268 | entries' = [ e { entryFormat = GnuFormat } | e <- entries ] 269 | 270 | prop_write_read_v7 :: [Entry] -> Bool 271 | prop_write_read_v7 entries = 272 | foldr Next Done entries' == read (write entries') 273 | where 274 | entries' = [ limitToV7FormatCompat e { entryFormat = V7Format } 275 | | e <- entries ] 276 | 277 | #endif 278 | -------------------------------------------------------------------------------- /Codec/Archive/Tar/Index/StringTable.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP, BangPatterns, PatternGuards, DeriveDataTypeable #-} 2 | 3 | module Codec.Archive.Tar.Index.StringTable ( 4 | 5 | StringTable, 6 | lookup, 7 | index, 8 | construct, 9 | 10 | StringTableBuilder, 11 | empty, 12 | insert, 13 | inserts, 14 | finalise, 15 | unfinalise, 16 | 17 | serialise, 18 | serialiseSize, 19 | deserialiseV1, 20 | deserialiseV2, 21 | 22 | #ifdef TESTS 23 | prop_valid, 24 | prop_sorted, 25 | prop_finalise_unfinalise, 26 | prop_serialise_deserialise, 27 | prop_serialiseSize, 28 | #endif 29 | ) where 30 | 31 | import Data.Typeable (Typeable) 32 | 33 | import Prelude hiding (lookup, id) 34 | import Data.List hiding (lookup, insert) 35 | import Data.Function (on) 36 | import Data.Word (Word32) 37 | import Data.Int (Int32) 38 | import Data.Bits 39 | import Data.Monoid (Monoid(..)) 40 | #if (MIN_VERSION_base(4,5,0)) 41 | import Data.Monoid ((<>)) 42 | #endif 43 | import Control.Exception (assert) 44 | 45 | import qualified Data.Array.Unboxed as A 46 | import Data.Array.Unboxed ((!)) 47 | #if MIN_VERSION_containers(0,5,0) 48 | import qualified Data.Map.Strict as Map 49 | import Data.Map.Strict (Map) 50 | #else 51 | import qualified Data.Map as Map 52 | import Data.Map (Map) 53 | #endif 54 | import qualified Data.ByteString as BS 55 | import qualified Data.ByteString.Unsafe as BS 56 | import qualified Data.ByteString.Lazy as LBS 57 | #if MIN_VERSION_bytestring(0,10,2) || defined(MIN_VERSION_bytestring_builder) 58 | import Data.ByteString.Builder as BS 59 | import Data.ByteString.Builder.Extra as BS (byteStringCopy) 60 | #else 61 | import Data.ByteString.Lazy.Builder as BS 62 | import Data.ByteString.Lazy.Builder.Extras as BS (byteStringCopy) 63 | #endif 64 | 65 | 66 | -- | An effecient mapping from strings to a dense set of integers. 67 | -- 68 | data StringTable id = StringTable 69 | {-# UNPACK #-} !BS.ByteString -- all strings concatenated 70 | {-# UNPACK #-} !(A.UArray Int32 Word32) -- string offset table 71 | {-# UNPACK #-} !(A.UArray Int32 Int32) -- string index to id table 72 | {-# UNPACK #-} !(A.UArray Int32 Int32) -- string id to index table 73 | deriving (Show, Typeable) 74 | 75 | instance (Eq id, Enum id) => Eq (StringTable id) where 76 | tbl1 == tbl2 = unfinalise tbl1 == unfinalise tbl2 77 | 78 | -- | Look up a string in the token table. If the string is present, return 79 | -- its corresponding index. 80 | -- 81 | lookup :: Enum id => StringTable id -> BS.ByteString -> Maybe id 82 | lookup (StringTable bs offsets ids _ixs) str = 83 | binarySearch 0 (topBound-1) str 84 | where 85 | (0, topBound) = A.bounds offsets 86 | 87 | binarySearch !a !b !key 88 | | a > b = Nothing 89 | | otherwise = case compare key (index' bs offsets mid) of 90 | LT -> binarySearch a (mid-1) key 91 | EQ -> Just $! toEnum (fromIntegral (ids ! mid)) 92 | GT -> binarySearch (mid+1) b key 93 | where mid = (a + b) `div` 2 94 | 95 | index' :: BS.ByteString -> A.UArray Int32 Word32 -> Int32 -> BS.ByteString 96 | index' bs offsets i = BS.unsafeTake len . BS.unsafeDrop start $ bs 97 | where 98 | start, end, len :: Int 99 | start = fromIntegral (offsets ! i) 100 | end = fromIntegral (offsets ! (i+1)) 101 | len = end - start 102 | 103 | 104 | -- | Given the index of a string in the table, return the string. 105 | -- 106 | index :: Enum id => StringTable id -> id -> BS.ByteString 107 | index (StringTable bs offsets _ids ixs) = 108 | index' bs offsets . (ixs !) . fromIntegral . fromEnum 109 | 110 | 111 | -- | Given a list of strings, construct a 'StringTable' mapping those strings 112 | -- to a dense set of integers. Also return the ids for all the strings used 113 | -- in the construction. 114 | -- 115 | construct :: Enum id => [BS.ByteString] -> StringTable id 116 | construct = finalise . foldl' (\tbl s -> fst (insert s tbl)) empty 117 | 118 | 119 | data StringTableBuilder id = StringTableBuilder 120 | !(Map BS.ByteString id) 121 | {-# UNPACK #-} !Word32 122 | deriving (Eq, Show, Typeable) 123 | 124 | empty :: StringTableBuilder id 125 | empty = StringTableBuilder Map.empty 0 126 | 127 | insert :: Enum id => BS.ByteString -> StringTableBuilder id -> (StringTableBuilder id, id) 128 | insert str builder@(StringTableBuilder smap nextid) = 129 | case Map.lookup str smap of 130 | Just id -> (builder, id) 131 | Nothing -> let !id = toEnum (fromIntegral nextid) 132 | !smap' = Map.insert str id smap 133 | in (StringTableBuilder smap' (nextid+1), id) 134 | 135 | inserts :: Enum id => [BS.ByteString] -> StringTableBuilder id -> (StringTableBuilder id, [id]) 136 | inserts bss builder = mapAccumL (flip insert) builder bss 137 | 138 | finalise :: Enum id => StringTableBuilder id -> StringTable id 139 | finalise (StringTableBuilder smap _) = 140 | (StringTable strs offsets ids ixs) 141 | where 142 | strs = BS.concat (Map.keys smap) 143 | offsets = A.listArray (0, fromIntegral (Map.size smap)) 144 | . scanl (\off str -> off + fromIntegral (BS.length str)) 0 145 | $ Map.keys smap 146 | ids = A.listArray (0, fromIntegral (Map.size smap) - 1) 147 | . map (fromIntegral . fromEnum) 148 | $ Map.elems smap 149 | ixs = A.array (A.bounds ids) [ (id,ix) | (ix,id) <- A.assocs ids ] 150 | 151 | unfinalise :: Enum id => StringTable id -> StringTableBuilder id 152 | unfinalise (StringTable strs offsets ids _) = 153 | StringTableBuilder smap nextid 154 | where 155 | smap = Map.fromAscList 156 | [ (index' strs offsets ix, toEnum (fromIntegral (ids ! ix))) 157 | | ix <- [0..h] ] 158 | (0,h) = A.bounds ids 159 | nextid = fromIntegral (h+1) 160 | 161 | 162 | ------------------------- 163 | -- (de)serialisation 164 | -- 165 | 166 | serialise :: StringTable id -> BS.Builder 167 | serialise (StringTable strs offs ids ixs) = 168 | let (_, !ixEnd) = A.bounds offs in 169 | 170 | BS.word32BE (fromIntegral (BS.length strs)) 171 | <> BS.word32BE (fromIntegral ixEnd + 1) 172 | <> BS.byteStringCopy strs 173 | <> foldr (\n r -> BS.word32BE n <> r) mempty (A.elems offs) 174 | <> foldr (\n r -> BS.int32BE n <> r) mempty (A.elems ids) 175 | <> foldr (\n r -> BS.int32BE n <> r) mempty (A.elems ixs) 176 | 177 | serialiseSize :: StringTable id -> Int 178 | serialiseSize (StringTable strs offs _ids _ixs) = 179 | let (_, !ixEnd) = A.bounds offs 180 | in 4 * 2 181 | + BS.length strs 182 | + 4 * (fromIntegral ixEnd + 1) 183 | + 8 * fromIntegral ixEnd 184 | 185 | deserialiseV1 :: BS.ByteString -> Maybe (StringTable id, BS.ByteString) 186 | deserialiseV1 bs 187 | | BS.length bs >= 8 188 | , let lenStrs = fromIntegral (readWord32BE bs 0) 189 | lenArr = fromIntegral (readWord32BE bs 4) 190 | lenTotal= 8 + lenStrs + 4 * lenArr 191 | , BS.length bs >= lenTotal 192 | , let strs = BS.take lenStrs (BS.drop 8 bs) 193 | arr = A.array (0, fromIntegral lenArr - 1) 194 | [ (i, readWord32BE bs off) 195 | | (i, off) <- zip [0 .. fromIntegral lenArr - 1] 196 | [offArrS,offArrS+4 .. offArrE] 197 | ] 198 | ids = A.array (0, fromIntegral lenArr - 1) 199 | [ (i,i) | i <- [0 .. fromIntegral lenArr - 1] ] 200 | ixs = ids -- two identity mappings 201 | offArrS = 8 + lenStrs 202 | offArrE = offArrS + 4 * lenArr - 1 203 | !stringTable = StringTable strs arr ids ixs 204 | !bs' = BS.drop lenTotal bs 205 | = Just (stringTable, bs') 206 | 207 | | otherwise 208 | = Nothing 209 | 210 | deserialiseV2 :: BS.ByteString -> Maybe (StringTable id, BS.ByteString) 211 | deserialiseV2 bs 212 | | BS.length bs >= 8 213 | , let lenStrs = fromIntegral (readWord32BE bs 0) 214 | lenArr = fromIntegral (readWord32BE bs 4) 215 | lenTotal= 8 -- the two length prefixes 216 | + lenStrs 217 | + 4 * lenArr 218 | +(4 * (lenArr - 1)) * 2 -- offsets array is 1 longer 219 | , BS.length bs >= lenTotal 220 | , let strs = BS.take lenStrs (BS.drop 8 bs) 221 | offs = A.listArray (0, fromIntegral lenArr - 1) 222 | [ readWord32BE bs off 223 | | off <- offsets offsOff ] 224 | -- the second two arrays are 1 shorter 225 | ids = A.listArray (0, fromIntegral lenArr - 2) 226 | [ readInt32BE bs off 227 | | off <- offsets idsOff ] 228 | ixs = A.listArray (0, fromIntegral lenArr - 2) 229 | [ readInt32BE bs off 230 | | off <- offsets ixsOff ] 231 | offsOff = 8 + lenStrs 232 | idsOff = offsOff + 4 * lenArr 233 | ixsOff = idsOff + 4 * (lenArr-1) 234 | offsets from = [from,from+4 .. from + 4 * (lenArr - 1)] 235 | !stringTable = StringTable strs offs ids ixs 236 | !bs' = BS.drop lenTotal bs 237 | = Just (stringTable, bs') 238 | 239 | | otherwise 240 | = Nothing 241 | 242 | readInt32BE :: BS.ByteString -> Int -> Int32 243 | readInt32BE bs i = fromIntegral (readWord32BE bs i) 244 | 245 | readWord32BE :: BS.ByteString -> Int -> Word32 246 | readWord32BE bs i = 247 | assert (i >= 0 && i+3 <= BS.length bs - 1) $ 248 | fromIntegral (BS.unsafeIndex bs (i + 0)) `shiftL` 24 249 | + fromIntegral (BS.unsafeIndex bs (i + 1)) `shiftL` 16 250 | + fromIntegral (BS.unsafeIndex bs (i + 2)) `shiftL` 8 251 | + fromIntegral (BS.unsafeIndex bs (i + 3)) 252 | 253 | #ifdef TESTS 254 | 255 | prop_valid :: [BS.ByteString] -> Bool 256 | prop_valid strs = 257 | all lookupIndex (enumStrings tbl) 258 | && all indexLookup (enumIds tbl) 259 | 260 | where 261 | tbl :: StringTable Int 262 | tbl = construct strs 263 | 264 | lookupIndex str = index tbl ident == str 265 | where Just ident = lookup tbl str 266 | 267 | indexLookup ident = lookup tbl str == Just ident 268 | where str = index tbl ident 269 | 270 | -- this is important so we can use Map.fromAscList 271 | prop_sorted :: [BS.ByteString] -> Bool 272 | prop_sorted strings = 273 | isSorted [ index' strs offsets ix 274 | | ix <- A.range (A.bounds ids) ] 275 | where 276 | _tbl :: StringTable Int 277 | _tbl@(StringTable strs offsets ids _ixs) = construct strings 278 | isSorted xs = and (zipWith (<) xs (tail xs)) 279 | 280 | prop_finalise_unfinalise :: [BS.ByteString] -> Bool 281 | prop_finalise_unfinalise strs = 282 | builder == unfinalise (finalise builder) 283 | where 284 | builder :: StringTableBuilder Int 285 | builder = foldl' (\tbl s -> fst (insert s tbl)) empty strs 286 | 287 | prop_serialise_deserialise :: [BS.ByteString] -> Bool 288 | prop_serialise_deserialise strs = 289 | Just (strtable, BS.empty) == (deserialiseV2 290 | . toStrict . BS.toLazyByteString 291 | . serialise) strtable 292 | where 293 | strtable :: StringTable Int 294 | strtable = construct strs 295 | 296 | prop_serialiseSize :: [BS.ByteString] -> Bool 297 | prop_serialiseSize strs = 298 | (fromIntegral . LBS.length . BS.toLazyByteString . serialise) strtable 299 | == serialiseSize strtable 300 | where 301 | strtable :: StringTable Int 302 | strtable = construct strs 303 | 304 | enumStrings :: Enum id => StringTable id -> [BS.ByteString] 305 | enumStrings (StringTable bs offsets _ _) = map (index' bs offsets) [0..h-1] 306 | where (0,h) = A.bounds offsets 307 | 308 | enumIds :: Enum id => StringTable id -> [id] 309 | enumIds (StringTable _ offsets _ _) = [toEnum 0 .. toEnum (fromIntegral (h-1))] 310 | where (0,h) = A.bounds offsets 311 | 312 | toStrict :: LBS.ByteString -> BS.ByteString 313 | #if MIN_VERSION_bytestring(0,10,0) 314 | toStrict = LBS.toStrict 315 | #else 316 | toStrict = BS.concat . LBS.toChunks 317 | #endif 318 | 319 | #endif 320 | 321 | #if !(MIN_VERSION_base(4,5,0)) 322 | (<>) :: Monoid m => m -> m -> m 323 | (<>) = mappend 324 | #endif 325 | -------------------------------------------------------------------------------- /Codec/Archive/Tar/Index/IntTrie.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP, BangPatterns, PatternGuards #-} 2 | {-# LANGUAGE DeriveDataTypeable, ScopedTypeVariables #-} 3 | 4 | module Codec.Archive.Tar.Index.IntTrie ( 5 | 6 | IntTrie, 7 | construct, 8 | toList, 9 | 10 | IntTrieBuilder, 11 | empty, 12 | insert, 13 | finalise, 14 | unfinalise, 15 | 16 | lookup, 17 | TrieLookup(..), 18 | 19 | serialise, 20 | serialiseSize, 21 | deserialise, 22 | 23 | #ifdef TESTS 24 | test1, test2, test3, 25 | ValidPaths(..), 26 | prop_lookup, 27 | prop_completions, 28 | prop_lookup_mono, 29 | prop_completions_mono, 30 | prop_construct_toList, 31 | prop_finalise_unfinalise, 32 | prop_serialise_deserialise, 33 | prop_serialiseSize, 34 | #endif 35 | ) where 36 | 37 | import Prelude hiding (lookup) 38 | 39 | import Data.Typeable (Typeable) 40 | 41 | import qualified Data.Array.Unboxed as A 42 | import Data.Array.IArray ((!)) 43 | import qualified Data.Bits as Bits 44 | import Data.Word (Word32) 45 | import Data.Bits 46 | import Data.Monoid (Monoid(..)) 47 | #if (MIN_VERSION_base(4,5,0)) 48 | import Data.Monoid ((<>)) 49 | #endif 50 | import qualified Data.ByteString as BS 51 | import qualified Data.ByteString.Lazy as LBS 52 | import qualified Data.ByteString.Unsafe as BS 53 | #if MIN_VERSION_bytestring(0,10,2) || defined(MIN_VERSION_bytestring_builder) 54 | import Data.ByteString.Builder as BS 55 | #else 56 | import Data.ByteString.Lazy.Builder as BS 57 | #endif 58 | import Control.Exception (assert) 59 | #if MIN_VERSION_containers(0,5,0) 60 | import qualified Data.Map.Strict as Map 61 | import qualified Data.IntMap.Strict as IntMap 62 | import Data.IntMap.Strict (IntMap) 63 | #else 64 | import qualified Data.Map as Map 65 | import qualified Data.IntMap as IntMap 66 | import Data.IntMap (IntMap) 67 | #endif 68 | 69 | import Data.List hiding (lookup, insert) 70 | import Data.Function (on) 71 | 72 | #ifdef TESTS 73 | import Test.QuickCheck 74 | import Control.Applicative ((<$>), (<*>)) 75 | #endif 76 | 77 | 78 | -- | A compact mapping from sequences of nats to nats. 79 | -- 80 | -- NOTE: The tries in this module have values /only/ at the leaves (which 81 | -- correspond to files), they do not have values at the branch points (which 82 | -- correspond to directories). 83 | newtype IntTrie k v = IntTrie (A.UArray Word32 Word32) 84 | deriving (Eq, Show, Typeable) 85 | 86 | 87 | -- Compact, read-only implementation of a trie. It's intended for use with file 88 | -- paths, but we do that via string ids. 89 | 90 | #ifdef TESTS 91 | -- Example mapping: 92 | -- 93 | example0 :: [(FilePath, Int)] 94 | example0 = 95 | [("foo-1.0/foo-1.0.cabal", 512) -- tar block 1 96 | ,("foo-1.0/LICENSE", 2048) -- tar block 4 97 | ,("foo-1.0/Data/Foo.hs", 4096)] -- tar block 8 98 | 99 | -- After converting path components to integers this becomes: 100 | -- 101 | example1 :: [([Word32], Word32)] 102 | example1 = 103 | [([1,2], 512) 104 | ,([1,3], 2048) 105 | ,([1,4,5], 4096)] 106 | 107 | -- As a trie this looks like: 108 | 109 | -- [ (1, *) ] 110 | -- | 111 | -- [ (2, 512), (3, 1024), (4, *) ] 112 | -- | 113 | -- [ (5, 4096) ] 114 | 115 | -- We use an intermediate trie representation 116 | 117 | mktrie :: [(Int, TrieNode k v)] -> IntTrieBuilder k v 118 | mkleaf :: (Enum k, Enum v) => k -> v -> (Int, TrieNode k v) 119 | mknode :: Enum k => k -> IntTrieBuilder k v -> (Int, TrieNode k v) 120 | 121 | mktrie = IntTrieBuilder . IntMap.fromList 122 | mkleaf k v = (fromEnum k, TrieLeaf (enumToWord32 v)) 123 | mknode k t = (fromEnum k, TrieNode t) 124 | 125 | example2 :: IntTrieBuilder Word32 Word32 126 | example2 = mktrie [ mknode 1 t1 ] 127 | where 128 | t1 = mktrie [ mkleaf 2 512, mkleaf 3 2048, mknode 4 t2 ] 129 | t2 = mktrie [ mkleaf 5 4096 ] 130 | 131 | 132 | example2' :: IntTrieBuilder Word32 Word32 133 | example2' = mktrie [ mknode 0 t1 ] 134 | where 135 | t1 = mktrie [ mknode 3 t2 ] 136 | t2 = mktrie [ mknode 1 t3, mknode 2 t4 ] 137 | t3 = mktrie [ mkleaf 4 10608 ] 138 | t4 = mktrie [ mkleaf 4 10612 ] 139 | {- 140 | 0: [1,N0,3] 141 | 142 | 3: [1,N3,6] 143 | 144 | 6: [2,N1,N2,11,12] 145 | 146 | 11: [1,4,10608] 147 | 14: [1,4,10612] 148 | -} 149 | 150 | example2'' :: IntTrieBuilder Word32 Word32 151 | example2'' = mktrie [ mknode 1 t1, mknode 2 t2 ] 152 | where 153 | t1 = mktrie [ mkleaf 4 10608 ] 154 | t2 = mktrie [ mkleaf 4 10612 ] 155 | 156 | example2''' :: IntTrieBuilder Word32 Word32 157 | example2''' = mktrie [ mknode 0 t3 ] 158 | where 159 | t3 = mktrie [ mknode 4 t8, mknode 6 t11 ] 160 | t8 = mktrie [ mknode 1 t14 ] 161 | t11 = mktrie [ mkleaf 5 10605 ] 162 | t14 = mktrie [ mknode 2 t19, mknode 3 t22 ] 163 | t19 = mktrie [ mkleaf 7 10608 ] 164 | t22 = mktrie [ mkleaf 7 10612 ] 165 | {- 166 | 0: [1,N0,3] 167 | 3: [2,N4,N6,8,11] 168 | 8: [1,N1,11] 169 | 11: [1,5,10605] 170 | 14: [2,N2,N3,16,19] 171 | 19: [1,7,10608] 172 | 22: [1,7,10612] 173 | -} 174 | 175 | -- We convert from the 'Paths' to the 'IntTrieBuilder' using 'inserts': 176 | -- 177 | test1 = example2 == inserts example1 empty 178 | #endif 179 | 180 | -- Each node has a size and a sequence of keys followed by an equal length 181 | -- sequnce of corresponding entries. Since we're going to flatten this into 182 | -- a single array then we will need to replace the trie structure with pointers 183 | -- represented as array offsets. 184 | 185 | -- Each node is a pair of arrays, one of keys and one of Either value pointer. 186 | -- We need to distinguish values from internal pointers. We use a tag bit: 187 | -- 188 | tagLeaf, tagNode, untag :: Word32 -> Word32 189 | tagLeaf = id 190 | tagNode = flip Bits.setBit 31 191 | untag = flip Bits.clearBit 31 192 | 193 | isNode :: Word32 -> Bool 194 | isNode = flip Bits.testBit 31 195 | 196 | -- So the overall array form of the above trie is: 197 | -- 198 | -- offset: 0 1 2 3 4 5 6 7 8 9 10 11 12 199 | -- array: [ 1 | N1 | 3 ][ 3 | 2, 3, N4 | 512, 2048, 10 ][ 1 | 5 | 4096 ] 200 | -- \__/ \___/ 201 | 202 | #ifdef TESTS 203 | example3 :: [Word32] 204 | example3 = 205 | [1, tagNode 1, 206 | 3, 207 | 3, tagLeaf 2, tagLeaf 3, tagNode 4, 208 | 512, 2048, 10, 209 | 1, tagLeaf 5, 210 | 4096 211 | ] 212 | 213 | -- We get the array form by using flattenTrie: 214 | 215 | test2 = example3 == flattenTrie example2 216 | 217 | example4 :: IntTrie Int Int 218 | example4 = IntTrie (mkArray example3) 219 | 220 | mkArray :: [Word32] -> A.UArray Word32 Word32 221 | mkArray xs = A.listArray (0, fromIntegral (length xs) - 1) xs 222 | 223 | test3 = case lookup example4 [1] of 224 | Just (Completions [(2,_),(3,_),(4,_)]) -> True 225 | _ -> False 226 | 227 | test1, test2, test3 :: Bool 228 | #endif 229 | 230 | ------------------------------------- 231 | -- Decoding the trie array form 232 | -- 233 | 234 | completionsFrom :: (Enum k, Enum v) => IntTrie k v -> Word32 -> Completions k v 235 | completionsFrom trie@(IntTrie arr) nodeOff = 236 | [ (word32ToEnum (untag key), next) 237 | | keyOff <- [keysStart..keysEnd] 238 | , let key = arr ! keyOff 239 | entry = arr ! (keyOff + nodeSize) 240 | next | isNode key = Completions (completionsFrom trie entry) 241 | | otherwise = Entry (word32ToEnum entry) 242 | ] 243 | where 244 | nodeSize = arr ! nodeOff 245 | keysStart = nodeOff + 1 246 | keysEnd = nodeOff + nodeSize 247 | 248 | -- | Convert the trie to a list 249 | -- 250 | -- This is the left inverse to 'construct' (modulo ordering). 251 | toList :: forall k v. (Enum k, Enum v) => IntTrie k v -> [([k], v)] 252 | toList = concatMap (aux []) . (`completionsFrom` 0) 253 | where 254 | aux :: [k] -> (k, TrieLookup k v) -> [([k], v)] 255 | aux ks (k, Entry v) = [(reverse (k:ks), v)] 256 | aux ks (k, Completions cs) = concatMap (aux (k:ks)) cs 257 | 258 | ------------------------------------- 259 | -- Toplevel trie array construction 260 | -- 261 | 262 | -- So constructing the 'IntTrie' as a whole is just a matter of stringing 263 | -- together all the bits 264 | 265 | -- | Build an 'IntTrie' from a bunch of (key, value) pairs, where the keys 266 | -- are sequences. 267 | -- 268 | construct :: (Enum k, Enum v) => [([k], v)] -> IntTrie k v 269 | construct = finalise . flip inserts empty 270 | 271 | 272 | --------------------------------- 273 | -- Looking up in the trie array 274 | -- 275 | 276 | data TrieLookup k v = Entry !v | Completions (Completions k v) deriving Show 277 | type Completions k v = [(k, TrieLookup k v)] 278 | 279 | lookup :: forall k v. (Enum k, Enum v) => IntTrie k v -> [k] -> Maybe (TrieLookup k v) 280 | lookup trie@(IntTrie arr) = go 0 281 | where 282 | go :: Word32 -> [k] -> Maybe (TrieLookup k v) 283 | go nodeOff [] = Just (completions nodeOff) 284 | go nodeOff (k:ks) = case search nodeOff (tagLeaf k') of 285 | Just entryOff 286 | | null ks -> Just (entry entryOff) 287 | | otherwise -> Nothing 288 | Nothing -> case search nodeOff (tagNode k') of 289 | Nothing -> Nothing 290 | Just entryOff -> go (arr ! entryOff) ks 291 | where 292 | k' = enumToWord32 k 293 | 294 | entry entryOff = Entry (word32ToEnum (arr ! entryOff)) 295 | completions nodeOff = Completions (completionsFrom trie nodeOff) 296 | 297 | search :: Word32 -> Word32 -> Maybe Word32 298 | search nodeOff key = fmap (+nodeSize) (bsearch keysStart keysEnd key) 299 | where 300 | nodeSize = arr ! nodeOff 301 | keysStart = nodeOff + 1 302 | keysEnd = nodeOff + nodeSize 303 | 304 | bsearch :: Word32 -> Word32 -> Word32 -> Maybe Word32 305 | bsearch a b key 306 | | a > b = Nothing 307 | | otherwise = case compare key (arr ! mid) of 308 | LT -> bsearch a (mid-1) key 309 | EQ -> Just mid 310 | GT -> bsearch (mid+1) b key 311 | where mid = (a + b) `div` 2 312 | 313 | 314 | enumToWord32 :: Enum n => n -> Word32 315 | enumToWord32 = fromIntegral . fromEnum 316 | 317 | word32ToEnum :: Enum n => Word32 -> n 318 | word32ToEnum = toEnum . fromIntegral 319 | 320 | 321 | ------------------------- 322 | -- Building Tries 323 | -- 324 | 325 | newtype IntTrieBuilder k v = IntTrieBuilder (IntMap (TrieNode k v)) 326 | deriving (Show, Eq) 327 | 328 | data TrieNode k v = TrieLeaf {-# UNPACK #-} !Word32 329 | | TrieNode !(IntTrieBuilder k v) 330 | deriving (Show, Eq) 331 | 332 | empty :: IntTrieBuilder k v 333 | empty = IntTrieBuilder IntMap.empty 334 | 335 | insert :: (Enum k, Enum v) => [k] -> v 336 | -> IntTrieBuilder k v -> IntTrieBuilder k v 337 | insert [] _v t = t 338 | insert (k:ks) v t = insertTrie (fromEnum k) (map fromEnum ks) (enumToWord32 v) t 339 | 340 | insertTrie :: Int -> [Int] -> Word32 341 | -> IntTrieBuilder k v -> IntTrieBuilder k v 342 | insertTrie k ks v (IntTrieBuilder t) = 343 | IntTrieBuilder $ 344 | IntMap.alter (\t' -> Just $! maybe (freshTrieNode ks v) 345 | (insertTrieNode ks v) t') 346 | k t 347 | 348 | insertTrieNode :: [Int] -> Word32 -> TrieNode k v -> TrieNode k v 349 | insertTrieNode [] v _ = TrieLeaf v 350 | insertTrieNode (k:ks) v (TrieLeaf _) = TrieNode (freshTrie k ks v) 351 | insertTrieNode (k:ks) v (TrieNode t) = TrieNode (insertTrie k ks v t) 352 | 353 | freshTrie :: Int -> [Int] -> Word32 -> IntTrieBuilder k v 354 | freshTrie k [] v = 355 | IntTrieBuilder (IntMap.singleton k (TrieLeaf v)) 356 | freshTrie k (k':ks) v = 357 | IntTrieBuilder (IntMap.singleton k (TrieNode (freshTrie k' ks v))) 358 | 359 | freshTrieNode :: [Int] -> Word32 -> TrieNode k v 360 | freshTrieNode [] v = TrieLeaf v 361 | freshTrieNode (k:ks) v = TrieNode (freshTrie k ks v) 362 | 363 | inserts :: (Enum k, Enum v) => [([k], v)] 364 | -> IntTrieBuilder k v -> IntTrieBuilder k v 365 | inserts kvs t = foldl' (\t' (ks, v) -> insert ks v t') t kvs 366 | 367 | finalise :: IntTrieBuilder k v -> IntTrie k v 368 | finalise trie = 369 | IntTrie $ 370 | A.listArray (0, fromIntegral (flatTrieLength trie) - 1) 371 | (flattenTrie trie) 372 | 373 | unfinalise :: (Enum k, Enum v) => IntTrie k v -> IntTrieBuilder k v 374 | unfinalise trie = 375 | go (completionsFrom trie 0) 376 | where 377 | go kns = 378 | IntTrieBuilder $ 379 | IntMap.fromList 380 | [ (fromEnum k, t) 381 | | (k, n) <- kns 382 | , let t = case n of 383 | Entry v -> TrieLeaf (enumToWord32 v) 384 | Completions kns' -> TrieNode (go kns') 385 | ] 386 | 387 | --------------------------------- 388 | -- Flattening Tries 389 | -- 390 | 391 | type Offset = Int 392 | 393 | flatTrieLength :: IntTrieBuilder k v -> Int 394 | flatTrieLength (IntTrieBuilder tns) = 395 | 1 396 | + 2 * IntMap.size tns 397 | + sum [ flatTrieLength n | TrieNode n <- IntMap.elems tns ] 398 | 399 | -- This is a breadth-first traversal. We keep a list of the tries that we are 400 | -- to write out next. Each of these have an offset allocated to them at the 401 | -- time we put them into the list. We keep a running offset so we know where 402 | -- to allocate next. 403 | -- 404 | flattenTrie :: IntTrieBuilder k v -> [Word32] 405 | flattenTrie trie = go (queue [trie]) (size trie) 406 | where 407 | size (IntTrieBuilder tns) = 1 + 2 * IntMap.size tns 408 | 409 | go :: Q (IntTrieBuilder k v) -> Offset -> [Word32] 410 | go todo !offset = 411 | case dequeue todo of 412 | Nothing -> [] 413 | Just (IntTrieBuilder tnodes, tries) -> 414 | flat ++ go tries' offset' 415 | where 416 | !count = IntMap.size tnodes 417 | flat = fromIntegral count 418 | : Map.keys keysValues 419 | ++ Map.elems keysValues 420 | (!offset', !keysValues, !tries') = 421 | #if MIN_VERSION_containers(0,4,2) 422 | IntMap.foldlWithKey' accumNodes 423 | (offset, Map.empty, tries) 424 | tnodes 425 | #else 426 | foldl' (\a (k,v) -> accumNodes a k v) 427 | (offset, Map.empty, tries) 428 | (IntMap.toList tnodes) 429 | #endif 430 | 431 | accumNodes :: (Offset, Map.Map Word32 Word32, Q (IntTrieBuilder k v)) 432 | -> Int -> TrieNode k v 433 | -> (Offset, Map.Map Word32 Word32, Q (IntTrieBuilder k v)) 434 | accumNodes (!off, !kvs, !tries) !k (TrieLeaf v) = 435 | (off, kvs', tries) 436 | where 437 | kvs' = Map.insert (tagLeaf (int2Word32 k)) v kvs 438 | 439 | accumNodes (!off, !kvs, !tries) !k (TrieNode t) = 440 | (off + size t, kvs', tries') 441 | where 442 | kvs' = Map.insert (tagNode (int2Word32 k)) (int2Word32 off) kvs 443 | tries' = enqueue tries t 444 | 445 | data Q a = Q [a] [a] 446 | 447 | queue :: [a] -> Q a 448 | queue xs = Q xs [] 449 | 450 | enqueue :: Q a -> a -> Q a 451 | enqueue (Q front back) x = Q front (x : back) 452 | 453 | dequeue :: Q a -> Maybe (a, Q a) 454 | dequeue (Q (x:xs) back) = Just (x, Q xs back) 455 | dequeue (Q [] back) = case reverse back of 456 | x:xs -> Just (x, Q xs []) 457 | [] -> Nothing 458 | 459 | int2Word32 :: Int -> Word32 460 | int2Word32 = fromIntegral 461 | 462 | 463 | ------------------------- 464 | -- (de)serialisation 465 | -- 466 | 467 | serialise :: IntTrie k v -> BS.Builder 468 | serialise (IntTrie arr) = 469 | let (_, !ixEnd) = A.bounds arr in 470 | BS.word32BE (ixEnd+1) 471 | <> foldr (\n r -> BS.word32BE n <> r) mempty (A.elems arr) 472 | 473 | serialiseSize :: IntTrie k v -> Int 474 | serialiseSize (IntTrie arr) = 475 | let (_, ixEnd) = A.bounds arr in 476 | 4 477 | + 4 * (fromIntegral ixEnd + 1) 478 | 479 | deserialise :: BS.ByteString -> Maybe (IntTrie k v, BS.ByteString) 480 | deserialise bs 481 | | BS.length bs >= 4 482 | , let lenArr = readWord32BE bs 0 483 | lenTotal = 4 + 4 * fromIntegral lenArr 484 | , BS.length bs >= 4 + 4 * fromIntegral lenArr 485 | , let !arr = A.array (0, lenArr-1) 486 | [ (i, readWord32BE bs off) 487 | | (i, off) <- zip [0..lenArr-1] [4,8 .. lenTotal - 4] ] 488 | !bs' = BS.drop lenTotal bs 489 | = Just (IntTrie arr, bs') 490 | 491 | | otherwise 492 | = Nothing 493 | 494 | readWord32BE :: BS.ByteString -> Int -> Word32 495 | readWord32BE bs i = 496 | assert (i >= 0 && i+3 <= BS.length bs - 1) $ 497 | fromIntegral (BS.unsafeIndex bs (i + 0)) `shiftL` 24 498 | + fromIntegral (BS.unsafeIndex bs (i + 1)) `shiftL` 16 499 | + fromIntegral (BS.unsafeIndex bs (i + 2)) `shiftL` 8 500 | + fromIntegral (BS.unsafeIndex bs (i + 3)) 501 | 502 | 503 | ------------------------- 504 | -- Correctness property 505 | -- 506 | 507 | #ifdef TESTS 508 | 509 | prop_lookup :: (Ord k, Enum k, Eq v, Enum v, Show k, Show v) 510 | => [([k], v)] -> Bool 511 | prop_lookup paths = 512 | flip all paths $ \(key, value) -> 513 | case lookup trie key of 514 | Just (Entry value') | value' == value -> True 515 | Just (Entry value') -> error $ "IntTrie: " ++ show (key, value, value') 516 | Nothing -> error $ "IntTrie: didn't find " ++ show key 517 | Just (Completions xs) -> error $ "IntTrie: " ++ show xs 518 | 519 | where 520 | trie = construct paths 521 | 522 | prop_completions :: forall k v. (Ord k, Enum k, Eq v, Enum v) => [([k], v)] -> Bool 523 | prop_completions paths = 524 | inserts paths empty 525 | == convertCompletions (completionsFrom (construct paths) 0) 526 | where 527 | convertCompletions :: Ord k => Completions k v -> IntTrieBuilder k v 528 | convertCompletions kls = 529 | IntTrieBuilder $ 530 | IntMap.fromList 531 | [ case l of 532 | Entry v -> mkleaf k v 533 | Completions kls' -> mknode k (convertCompletions kls') 534 | | (k, l) <- sortBy (compare `on` fst) kls ] 535 | 536 | 537 | prop_lookup_mono :: ValidPaths -> Bool 538 | prop_lookup_mono (ValidPaths paths) = prop_lookup paths 539 | 540 | prop_completions_mono :: ValidPaths -> Bool 541 | prop_completions_mono (ValidPaths paths) = prop_completions paths 542 | 543 | prop_construct_toList :: ValidPaths -> Bool 544 | prop_construct_toList (ValidPaths paths) = 545 | sortBy (compare `on` fst) (toList (construct paths)) 546 | == sortBy (compare `on` fst) paths 547 | 548 | prop_finalise_unfinalise :: ValidPaths -> Bool 549 | prop_finalise_unfinalise (ValidPaths paths) = 550 | builder == unfinalise (finalise builder) 551 | where 552 | builder :: IntTrieBuilder Char Char 553 | builder = inserts paths empty 554 | 555 | prop_serialise_deserialise :: ValidPaths -> Bool 556 | prop_serialise_deserialise (ValidPaths paths) = 557 | Just (trie, BS.empty) == (deserialise 558 | . toStrict . BS.toLazyByteString 559 | . serialise) trie 560 | where 561 | trie :: IntTrie Char Char 562 | trie = construct paths 563 | 564 | prop_serialiseSize :: ValidPaths -> Bool 565 | prop_serialiseSize (ValidPaths paths) = 566 | (fromIntegral . LBS.length . BS.toLazyByteString . serialise) trie 567 | == serialiseSize trie 568 | where 569 | trie :: IntTrie Char Char 570 | trie = construct paths 571 | 572 | newtype ValidPaths = ValidPaths [([Char], Char)] deriving Show 573 | 574 | instance Arbitrary ValidPaths where 575 | arbitrary = 576 | ValidPaths . makeNoPrefix <$> listOf ((,) <$> listOf1 arbitrary <*> arbitrary) 577 | where 578 | makeNoPrefix [] = [] 579 | makeNoPrefix ((k,v):kvs) 580 | | all (\(k', _) -> not (isPrefixOfOther k k')) kvs 581 | = (k,v) : makeNoPrefix kvs 582 | | otherwise = makeNoPrefix kvs 583 | 584 | shrink (ValidPaths kvs) = 585 | map ValidPaths . filter noPrefix . filter nonEmpty . shrink $ kvs 586 | where 587 | noPrefix [] = True 588 | noPrefix ((k,_):kvs') = all (\(k', _) -> not (isPrefixOfOther k k')) kvs' 589 | && noPrefix kvs' 590 | nonEmpty = all (not . null . fst) 591 | 592 | isPrefixOfOther a b = a `isPrefixOf` b || b `isPrefixOf` a 593 | 594 | toStrict :: LBS.ByteString -> BS.ByteString 595 | #if MIN_VERSION_bytestring(0,10,0) 596 | toStrict = LBS.toStrict 597 | #else 598 | toStrict = BS.concat . LBS.toChunks 599 | #endif 600 | 601 | #endif 602 | 603 | #if !(MIN_VERSION_base(4,5,0)) 604 | (<>) :: Monoid m => m -> m -> m 605 | (<>) = mappend 606 | #endif 607 | 608 | -------------------------------------------------------------------------------- /Codec/Archive/Tar/Types.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP, GeneralizedNewtypeDeriving, BangPatterns #-} 2 | ----------------------------------------------------------------------------- 3 | -- | 4 | -- Module : Codec.Archive.Tar.Types 5 | -- Copyright : (c) 2007 Bjorn Bringert, 6 | -- 2008 Andrea Vezzosi, 7 | -- 2008-2009 Duncan Coutts 8 | -- 2011 Max Bolingbroke 9 | -- License : BSD3 10 | -- 11 | -- Maintainer : duncan@community.haskell.org 12 | -- Portability : portable 13 | -- 14 | -- Types to represent the content of @.tar@ archives. 15 | -- 16 | ----------------------------------------------------------------------------- 17 | module Codec.Archive.Tar.Types ( 18 | 19 | Entry(..), 20 | entryPath, 21 | EntryContent(..), 22 | FileSize, 23 | Permissions, 24 | Ownership(..), 25 | EpochTime, 26 | TypeCode, 27 | DevMajor, 28 | DevMinor, 29 | Format(..), 30 | 31 | simpleEntry, 32 | fileEntry, 33 | directoryEntry, 34 | 35 | ordinaryFilePermissions, 36 | executableFilePermissions, 37 | directoryPermissions, 38 | 39 | TarPath(..), 40 | toTarPath, 41 | fromTarPath, 42 | fromTarPathToPosixPath, 43 | fromTarPathToWindowsPath, 44 | 45 | LinkTarget(..), 46 | toLinkTarget, 47 | fromLinkTarget, 48 | fromLinkTargetToPosixPath, 49 | fromLinkTargetToWindowsPath, 50 | 51 | Entries(..), 52 | mapEntries, 53 | mapEntriesNoFail, 54 | foldEntries, 55 | foldlEntries, 56 | unfoldEntries, 57 | 58 | #ifdef TESTS 59 | limitToV7FormatCompat 60 | #endif 61 | ) where 62 | 63 | import Data.Int (Int64) 64 | import Data.Monoid (Monoid(..)) 65 | import qualified Data.ByteString as BS 66 | import qualified Data.ByteString.Char8 as BS.Char8 67 | import qualified Data.ByteString.Lazy as LBS 68 | import Control.DeepSeq 69 | 70 | import qualified System.FilePath as FilePath.Native 71 | ( joinPath, splitDirectories, addTrailingPathSeparator ) 72 | import qualified System.FilePath.Posix as FilePath.Posix 73 | ( joinPath, splitPath, splitDirectories, hasTrailingPathSeparator 74 | , addTrailingPathSeparator ) 75 | import qualified System.FilePath.Windows as FilePath.Windows 76 | ( joinPath, addTrailingPathSeparator ) 77 | import System.Posix.Types 78 | ( FileMode ) 79 | 80 | #ifdef TESTS 81 | import Test.QuickCheck 82 | import Control.Applicative ((<$>), pure, (<*>)) 83 | #endif 84 | 85 | 86 | type FileSize = Int64 87 | -- | The number of seconds since the UNIX epoch 88 | type EpochTime = Int64 89 | type DevMajor = Int 90 | type DevMinor = Int 91 | type TypeCode = Char 92 | type Permissions = FileMode 93 | 94 | -- | Tar archive entry. 95 | -- 96 | data Entry = Entry { 97 | 98 | -- | The path of the file or directory within the archive. This is in a 99 | -- tar-specific form. Use 'entryPath' to get a native 'FilePath'. 100 | entryTarPath :: {-# UNPACK #-} !TarPath, 101 | 102 | -- | The real content of the entry. For 'NormalFile' this includes the 103 | -- file data. An entry usually contains a 'NormalFile' or a 'Directory'. 104 | entryContent :: !EntryContent, 105 | 106 | -- | File permissions (Unix style file mode). 107 | entryPermissions :: {-# UNPACK #-} !Permissions, 108 | 109 | -- | The user and group to which this file belongs. 110 | entryOwnership :: {-# UNPACK #-} !Ownership, 111 | 112 | -- | The time the file was last modified. 113 | entryTime :: {-# UNPACK #-} !EpochTime, 114 | 115 | -- | The tar format the archive is using. 116 | entryFormat :: !Format 117 | } 118 | deriving (Eq, Show) 119 | 120 | -- | Native 'FilePath' of the file or directory within the archive. 121 | -- 122 | entryPath :: Entry -> FilePath 123 | entryPath = fromTarPath . entryTarPath 124 | 125 | -- | The content of a tar archive entry, which depends on the type of entry. 126 | -- 127 | -- Portable archives should contain only 'NormalFile' and 'Directory'. 128 | -- 129 | data EntryContent = NormalFile LBS.ByteString {-# UNPACK #-} !FileSize 130 | | Directory 131 | | SymbolicLink !LinkTarget 132 | | HardLink !LinkTarget 133 | | CharacterDevice {-# UNPACK #-} !DevMajor 134 | {-# UNPACK #-} !DevMinor 135 | | BlockDevice {-# UNPACK #-} !DevMajor 136 | {-# UNPACK #-} !DevMinor 137 | | NamedPipe 138 | | OtherEntryType {-# UNPACK #-} !TypeCode LBS.ByteString 139 | {-# UNPACK #-} !FileSize 140 | deriving (Eq, Ord, Show) 141 | 142 | data Ownership = Ownership { 143 | -- | The owner user name. Should be set to @\"\"@ if unknown. 144 | ownerName :: String, 145 | 146 | -- | The owner group name. Should be set to @\"\"@ if unknown. 147 | groupName :: String, 148 | 149 | -- | Numeric owner user id. Should be set to @0@ if unknown. 150 | ownerId :: {-# UNPACK #-} !Int, 151 | 152 | -- | Numeric owner group id. Should be set to @0@ if unknown. 153 | groupId :: {-# UNPACK #-} !Int 154 | } 155 | deriving (Eq, Ord, Show) 156 | 157 | -- | There have been a number of extensions to the tar file format over the 158 | -- years. They all share the basic entry fields and put more meta-data in 159 | -- different extended headers. 160 | -- 161 | data Format = 162 | 163 | -- | This is the classic Unix V7 tar format. It does not support owner and 164 | -- group names, just numeric Ids. It also does not support device numbers. 165 | V7Format 166 | 167 | -- | The \"USTAR\" format is an extension of the classic V7 format. It was 168 | -- later standardised by POSIX. It has some restrictions but is the most 169 | -- portable format. 170 | -- 171 | | UstarFormat 172 | 173 | -- | The GNU tar implementation also extends the classic V7 format, though 174 | -- in a slightly different way from the USTAR format. In general for new 175 | -- archives the standard USTAR/POSIX should be used. 176 | -- 177 | | GnuFormat 178 | deriving (Eq, Ord, Show) 179 | 180 | instance NFData Entry where 181 | rnf (Entry _ c _ _ _ _) = rnf c 182 | 183 | instance NFData EntryContent where 184 | rnf x = case x of 185 | NormalFile c _ -> rnflbs c 186 | OtherEntryType _ c _ -> rnflbs c 187 | _ -> seq x () 188 | where 189 | #if MIN_VERSION_bytestring(0,10,0) 190 | rnflbs = rnf 191 | #else 192 | rnflbs = foldr (\ !_bs r -> r) () . LBS.toChunks 193 | #endif 194 | 195 | instance NFData Ownership where 196 | rnf (Ownership o g _ _) = rnf o `seq` rnf g 197 | 198 | -- | @rw-r--r--@ for normal files 199 | ordinaryFilePermissions :: Permissions 200 | ordinaryFilePermissions = 0o0644 201 | 202 | -- | @rwxr-xr-x@ for executable files 203 | executableFilePermissions :: Permissions 204 | executableFilePermissions = 0o0755 205 | 206 | -- | @rwxr-xr-x@ for directories 207 | directoryPermissions :: Permissions 208 | directoryPermissions = 0o0755 209 | 210 | -- | An 'Entry' with all default values except for the file name and type. It 211 | -- uses the portable USTAR/POSIX format (see 'UstarHeader'). 212 | -- 213 | -- You can use this as a basis and override specific fields, eg: 214 | -- 215 | -- > (emptyEntry name HardLink) { linkTarget = target } 216 | -- 217 | simpleEntry :: TarPath -> EntryContent -> Entry 218 | simpleEntry tarpath content = Entry { 219 | entryTarPath = tarpath, 220 | entryContent = content, 221 | entryPermissions = case content of 222 | Directory -> directoryPermissions 223 | _ -> ordinaryFilePermissions, 224 | entryOwnership = Ownership "" "" 0 0, 225 | entryTime = 0, 226 | entryFormat = UstarFormat 227 | } 228 | 229 | -- | A tar 'Entry' for a file. 230 | -- 231 | -- Entry fields such as file permissions and ownership have default values. 232 | -- 233 | -- You can use this as a basis and override specific fields. For example if you 234 | -- need an executable file you could use: 235 | -- 236 | -- > (fileEntry name content) { fileMode = executableFileMode } 237 | -- 238 | fileEntry :: TarPath -> LBS.ByteString -> Entry 239 | fileEntry name fileContent = 240 | simpleEntry name (NormalFile fileContent (LBS.length fileContent)) 241 | 242 | -- | A tar 'Entry' for a directory. 243 | -- 244 | -- Entry fields such as file permissions and ownership have default values. 245 | -- 246 | directoryEntry :: TarPath -> Entry 247 | directoryEntry name = simpleEntry name Directory 248 | 249 | -- 250 | -- * Tar paths 251 | -- 252 | 253 | -- | The classic tar format allowed just 100 characters for the file name. The 254 | -- USTAR format extended this with an extra 155 characters, however it uses a 255 | -- complex method of splitting the name between the two sections. 256 | -- 257 | -- Instead of just putting any overflow into the extended area, it uses the 258 | -- extended area as a prefix. The aggravating insane bit however is that the 259 | -- prefix (if any) must only contain a directory prefix. That is the split 260 | -- between the two areas must be on a directory separator boundary. So there is 261 | -- no simple calculation to work out if a file name is too long. Instead we 262 | -- have to try to find a valid split that makes the name fit in the two areas. 263 | -- 264 | -- The rationale presumably was to make it a bit more compatible with old tar 265 | -- programs that only understand the classic format. A classic tar would be 266 | -- able to extract the file name and possibly some dir prefix, but not the 267 | -- full dir prefix. So the files would end up in the wrong place, but that's 268 | -- probably better than ending up with the wrong names too. 269 | -- 270 | -- So it's understandable but rather annoying. 271 | -- 272 | -- * Tar paths use Posix format (ie @\'/\'@ directory separators), irrespective 273 | -- of the local path conventions. 274 | -- 275 | -- * The directory separator between the prefix and name is /not/ stored. 276 | -- 277 | data TarPath = TarPath {-# UNPACK #-} !BS.ByteString -- path name, 100 characters max. 278 | {-# UNPACK #-} !BS.ByteString -- path prefix, 155 characters max. 279 | deriving (Eq, Ord) 280 | 281 | instance NFData TarPath where 282 | rnf (TarPath _ _) = () -- fully strict by construction 283 | 284 | instance Show TarPath where 285 | show = show . fromTarPath 286 | 287 | -- | Convert a 'TarPath' to a native 'FilePath'. 288 | -- 289 | -- The native 'FilePath' will use the native directory separator but it is not 290 | -- otherwise checked for validity or sanity. In particular: 291 | -- 292 | -- * The tar path may be invalid as a native path, eg the file name @\"nul\"@ 293 | -- is not valid on Windows. 294 | -- 295 | -- * The tar path may be an absolute path or may contain @\"..\"@ components. 296 | -- For security reasons this should not usually be allowed, but it is your 297 | -- responsibility to check for these conditions (eg using 'checkSecurity'). 298 | -- 299 | fromTarPath :: TarPath -> FilePath 300 | fromTarPath (TarPath namebs prefixbs) = adjustDirectory $ 301 | FilePath.Native.joinPath $ FilePath.Posix.splitDirectories prefix 302 | ++ FilePath.Posix.splitDirectories name 303 | where 304 | name = BS.Char8.unpack namebs 305 | prefix = BS.Char8.unpack prefixbs 306 | adjustDirectory | FilePath.Posix.hasTrailingPathSeparator name 307 | = FilePath.Native.addTrailingPathSeparator 308 | | otherwise = id 309 | 310 | -- | Convert a 'TarPath' to a Unix\/Posix 'FilePath'. 311 | -- 312 | -- The difference compared to 'fromTarPath' is that it always returns a Unix 313 | -- style path irrespective of the current operating system. 314 | -- 315 | -- This is useful to check how a 'TarPath' would be interpreted on a specific 316 | -- operating system, eg to perform portability checks. 317 | -- 318 | fromTarPathToPosixPath :: TarPath -> FilePath 319 | fromTarPathToPosixPath (TarPath namebs prefixbs) = adjustDirectory $ 320 | FilePath.Posix.joinPath $ FilePath.Posix.splitDirectories prefix 321 | ++ FilePath.Posix.splitDirectories name 322 | where 323 | name = BS.Char8.unpack namebs 324 | prefix = BS.Char8.unpack prefixbs 325 | adjustDirectory | FilePath.Posix.hasTrailingPathSeparator name 326 | = FilePath.Posix.addTrailingPathSeparator 327 | | otherwise = id 328 | 329 | -- | Convert a 'TarPath' to a Windows 'FilePath'. 330 | -- 331 | -- The only difference compared to 'fromTarPath' is that it always returns a 332 | -- Windows style path irrespective of the current operating system. 333 | -- 334 | -- This is useful to check how a 'TarPath' would be interpreted on a specific 335 | -- operating system, eg to perform portability checks. 336 | -- 337 | fromTarPathToWindowsPath :: TarPath -> FilePath 338 | fromTarPathToWindowsPath (TarPath namebs prefixbs) = adjustDirectory $ 339 | FilePath.Windows.joinPath $ FilePath.Posix.splitDirectories prefix 340 | ++ FilePath.Posix.splitDirectories name 341 | where 342 | name = BS.Char8.unpack namebs 343 | prefix = BS.Char8.unpack prefixbs 344 | adjustDirectory | FilePath.Posix.hasTrailingPathSeparator name 345 | = FilePath.Windows.addTrailingPathSeparator 346 | | otherwise = id 347 | 348 | -- | Convert a native 'FilePath' to a 'TarPath'. 349 | -- 350 | -- The conversion may fail if the 'FilePath' is too long. See 'TarPath' for a 351 | -- description of the problem with splitting long 'FilePath's. 352 | -- 353 | toTarPath :: Bool -- ^ Is the path for a directory? This is needed because for 354 | -- directories a 'TarPath' must always use a trailing @\/@. 355 | -> FilePath -> Either String TarPath 356 | toTarPath isDir = splitLongPath 357 | . addTrailingSep 358 | . FilePath.Posix.joinPath 359 | . FilePath.Native.splitDirectories 360 | where 361 | addTrailingSep | isDir = FilePath.Posix.addTrailingPathSeparator 362 | | otherwise = id 363 | 364 | -- | Take a sanitised path, split on directory separators and try to pack it 365 | -- into the 155 + 100 tar file name format. 366 | -- 367 | -- The strategy is this: take the name-directory components in reverse order 368 | -- and try to fit as many components into the 100 long name area as possible. 369 | -- If all the remaining components fit in the 155 name area then we win. 370 | -- 371 | splitLongPath :: FilePath -> Either String TarPath 372 | splitLongPath path = 373 | case packName nameMax (reverse (FilePath.Posix.splitPath path)) of 374 | Left err -> Left err 375 | Right (name, []) -> Right $! TarPath (BS.Char8.pack name) 376 | BS.empty 377 | Right (name, first:rest) -> case packName prefixMax remainder of 378 | Left err -> Left err 379 | Right (_ , (_:_)) -> Left "File name too long (cannot split)" 380 | Right (prefix, []) -> Right $! TarPath (BS.Char8.pack name) 381 | (BS.Char8.pack prefix) 382 | where 383 | -- drop the '/' between the name and prefix: 384 | remainder = init first : rest 385 | 386 | where 387 | nameMax, prefixMax :: Int 388 | nameMax = 100 389 | prefixMax = 155 390 | 391 | packName _ [] = Left "File name empty" 392 | packName maxLen (c:cs) 393 | | n > maxLen = Left "File name too long" 394 | | otherwise = Right (packName' maxLen n [c] cs) 395 | where n = length c 396 | 397 | packName' maxLen n ok (c:cs) 398 | | n' <= maxLen = packName' maxLen n' (c:ok) cs 399 | where n' = n + length c 400 | packName' _ _ ok cs = (FilePath.Posix.joinPath ok, cs) 401 | 402 | -- | The tar format allows just 100 ASCII characters for the 'SymbolicLink' and 403 | -- 'HardLink' entry types. 404 | -- 405 | newtype LinkTarget = LinkTarget BS.ByteString 406 | deriving (Eq, Ord, Show) 407 | 408 | instance NFData LinkTarget where 409 | #if MIN_VERSION_bytestring(0,10,0) 410 | rnf (LinkTarget bs) = rnf bs 411 | #else 412 | rnf (LinkTarget !_bs) = () 413 | #endif 414 | 415 | -- | Convert a native 'FilePath' to a tar 'LinkTarget'. This may fail if the 416 | -- string is longer than 100 characters or if it contains non-portable 417 | -- characters. 418 | -- 419 | toLinkTarget :: FilePath -> Maybe LinkTarget 420 | toLinkTarget path | length path <= 100 = Just $! LinkTarget (BS.Char8.pack path) 421 | | otherwise = Nothing 422 | 423 | -- | Convert a tar 'LinkTarget' to a native 'FilePath'. 424 | -- 425 | fromLinkTarget :: LinkTarget -> FilePath 426 | fromLinkTarget (LinkTarget pathbs) = adjustDirectory $ 427 | FilePath.Native.joinPath $ FilePath.Posix.splitDirectories path 428 | where 429 | path = BS.Char8.unpack pathbs 430 | adjustDirectory | FilePath.Posix.hasTrailingPathSeparator path 431 | = FilePath.Native.addTrailingPathSeparator 432 | | otherwise = id 433 | 434 | -- | Convert a tar 'LinkTarget' to a Unix/Posix 'FilePath'. 435 | -- 436 | fromLinkTargetToPosixPath :: LinkTarget -> FilePath 437 | fromLinkTargetToPosixPath (LinkTarget pathbs) = adjustDirectory $ 438 | FilePath.Posix.joinPath $ FilePath.Posix.splitDirectories path 439 | where 440 | path = BS.Char8.unpack pathbs 441 | adjustDirectory | FilePath.Posix.hasTrailingPathSeparator path 442 | = FilePath.Native.addTrailingPathSeparator 443 | | otherwise = id 444 | 445 | -- | Convert a tar 'LinkTarget' to a Windows 'FilePath'. 446 | -- 447 | fromLinkTargetToWindowsPath :: LinkTarget -> FilePath 448 | fromLinkTargetToWindowsPath (LinkTarget pathbs) = adjustDirectory $ 449 | FilePath.Windows.joinPath $ FilePath.Posix.splitDirectories path 450 | where 451 | path = BS.Char8.unpack pathbs 452 | adjustDirectory | FilePath.Posix.hasTrailingPathSeparator path 453 | = FilePath.Windows.addTrailingPathSeparator 454 | | otherwise = id 455 | 456 | -- 457 | -- * Entries type 458 | -- 459 | 460 | -- | A tar archive is a sequence of entries. 461 | -- 462 | -- The point of this type as opposed to just using a list is that it makes the 463 | -- failure case explicit. We need this because the sequence of entries we get 464 | -- from reading a tarball can include errors. 465 | -- 466 | -- It is a concrete data type so you can manipulate it directly but it is often 467 | -- clearer to use the provided functions for mapping, folding and unfolding. 468 | -- 469 | -- Converting from a list can be done with just @foldr Next Done@. Converting 470 | -- back into a list can be done with 'foldEntries' however in that case you 471 | -- must be prepared to handle the 'Fail' case inherent in the 'Entries' type. 472 | -- 473 | -- The 'Monoid' instance lets you concatenate archives or append entries to an 474 | -- archive. 475 | -- 476 | data Entries e = Next Entry (Entries e) 477 | | Done 478 | | Fail e 479 | deriving (Eq, Show) 480 | 481 | infixr 5 `Next` 482 | 483 | -- | This is like the standard 'unfoldr' function on lists, but for 'Entries'. 484 | -- It includes failure as an extra possibility that the stepper function may 485 | -- return. 486 | -- 487 | -- It can be used to generate 'Entries' from some other type. For example it is 488 | -- used internally to lazily unfold entries from a 'LBS.ByteString'. 489 | -- 490 | unfoldEntries :: (a -> Either e (Maybe (Entry, a))) -> a -> Entries e 491 | unfoldEntries f = unfold 492 | where 493 | unfold x = case f x of 494 | Left err -> Fail err 495 | Right Nothing -> Done 496 | Right (Just (e, x')) -> Next e (unfold x') 497 | 498 | -- | This is like the standard 'foldr' function on lists, but for 'Entries'. 499 | -- Compared to 'foldr' it takes an extra function to account for the 500 | -- possibility of failure. 501 | -- 502 | -- This is used to consume a sequence of entries. For example it could be used 503 | -- to scan a tarball for problems or to collect an index of the contents. 504 | -- 505 | foldEntries :: (Entry -> a -> a) -> a -> (e -> a) -> Entries e -> a 506 | foldEntries next done fail' = fold 507 | where 508 | fold (Next e es) = next e (fold es) 509 | fold Done = done 510 | fold (Fail err) = fail' err 511 | 512 | -- | A 'foldl'-like function on Entries. It either returns the final 513 | -- accumulator result, or the failure along with the intermediate accumulator 514 | -- value. 515 | -- 516 | foldlEntries :: (a -> Entry -> a) -> a -> Entries e -> Either (e, a) a 517 | foldlEntries f z = go z 518 | where 519 | go !acc (Next e es) = go (f acc e) es 520 | go !acc Done = Right acc 521 | go !acc (Fail err) = Left (err, acc) 522 | 523 | -- | This is like the standard 'map' function on lists, but for 'Entries'. It 524 | -- includes failure as a extra possible outcome of the mapping function. 525 | -- 526 | -- If your mapping function cannot fail it may be more convenient to use 527 | -- 'mapEntriesNoFail' 528 | mapEntries :: (Entry -> Either e' Entry) -> Entries e -> Entries (Either e e') 529 | mapEntries f = 530 | foldEntries (\entry rest -> either (Fail . Right) (flip Next rest) (f entry)) Done (Fail . Left) 531 | 532 | -- | Like 'mapEntries' but the mapping function itself cannot fail. 533 | -- 534 | mapEntriesNoFail :: (Entry -> Entry) -> Entries e -> Entries e 535 | mapEntriesNoFail f = 536 | foldEntries (\entry -> Next (f entry)) Done Fail 537 | 538 | instance Monoid (Entries e) where 539 | mempty = Done 540 | mappend a b = foldEntries Next b Fail a 541 | 542 | instance Functor Entries where 543 | fmap f = foldEntries Next Done (Fail . f) 544 | 545 | instance NFData e => NFData (Entries e) where 546 | rnf (Next e es) = rnf e `seq` rnf es 547 | rnf Done = () 548 | rnf (Fail e) = rnf e 549 | 550 | 551 | ------------------------- 552 | -- QuickCheck instances 553 | -- 554 | 555 | #ifdef TESTS 556 | 557 | instance Arbitrary Entry where 558 | arbitrary = Entry <$> arbitrary <*> arbitrary <*> arbitraryPermissions 559 | <*> arbitrary <*> arbitraryEpochTime <*> arbitrary 560 | where 561 | arbitraryPermissions :: Gen Permissions 562 | arbitraryPermissions = fromIntegral <$> (arbitraryOctal 7 :: Gen Int) 563 | 564 | arbitraryEpochTime :: Gen EpochTime 565 | arbitraryEpochTime = fromIntegral <$> (arbitraryOctal 11 :: Gen Int) 566 | 567 | shrink (Entry path content perms author time format) = 568 | [ Entry path' content' perms author' time' format 569 | | (path', content', author', time') <- 570 | shrink (path, content, author, time) ] 571 | ++ [ Entry path content perms' author time format 572 | | perms' <- shrinkIntegral perms ] 573 | 574 | instance Arbitrary TarPath where 575 | arbitrary = either error id 576 | . toTarPath False 577 | . FilePath.Posix.joinPath 578 | <$> listOf1ToN (255 `div` 5) 579 | (elements (map (replicate 4) "abcd")) 580 | 581 | shrink = map (either error id . toTarPath False) 582 | . map FilePath.Posix.joinPath 583 | . filter (not . null) 584 | . shrinkList shrinkNothing 585 | . FilePath.Posix.splitPath 586 | . fromTarPathToPosixPath 587 | 588 | instance Arbitrary LinkTarget where 589 | arbitrary = maybe (error "link target too large") id 590 | . toLinkTarget 591 | . FilePath.Native.joinPath 592 | <$> listOf1ToN (100 `div` 5) 593 | (elements (map (replicate 4) "abcd")) 594 | 595 | shrink = map (maybe (error "link target too large") id . toLinkTarget) 596 | . map FilePath.Posix.joinPath 597 | . filter (not . null) 598 | . shrinkList shrinkNothing 599 | . FilePath.Posix.splitPath 600 | . fromLinkTargetToPosixPath 601 | 602 | 603 | listOf1ToN :: Int -> Gen a -> Gen [a] 604 | listOf1ToN n g = sized $ \sz -> do 605 | n <- choose (1, min n (max 1 sz)) 606 | vectorOf n g 607 | 608 | listOf0ToN :: Int -> Gen a -> Gen [a] 609 | listOf0ToN n g = sized $ \sz -> do 610 | n <- choose (0, min n sz) 611 | vectorOf n g 612 | 613 | instance Arbitrary EntryContent where 614 | arbitrary = 615 | frequency 616 | [ (16, do bs <- arbitrary; 617 | return (NormalFile bs (LBS.length bs))) 618 | , (2, pure Directory) 619 | , (1, SymbolicLink <$> arbitrary) 620 | , (1, HardLink <$> arbitrary) 621 | , (1, CharacterDevice <$> arbitraryOctal 7 <*> arbitraryOctal 7) 622 | , (1, BlockDevice <$> arbitraryOctal 7 <*> arbitraryOctal 7) 623 | , (1, pure NamedPipe) 624 | , (1, do c <- elements (['A'..'Z']++['a'..'z']) 625 | bs <- arbitrary; 626 | return (OtherEntryType c bs (LBS.length bs))) 627 | ] 628 | 629 | shrink (NormalFile bs _) = [ NormalFile bs' (LBS.length bs') 630 | | bs' <- shrink bs ] 631 | shrink Directory = [] 632 | shrink (SymbolicLink link) = [ SymbolicLink link' | link' <- shrink link ] 633 | shrink (HardLink link) = [ HardLink link' | link' <- shrink link ] 634 | shrink (CharacterDevice ma mi) = [ CharacterDevice ma' mi' 635 | | (ma', mi') <- shrink (ma, mi) ] 636 | shrink (BlockDevice ma mi) = [ BlockDevice ma' mi' 637 | | (ma', mi') <- shrink (ma, mi) ] 638 | shrink NamedPipe = [] 639 | shrink (OtherEntryType c bs _) = [ OtherEntryType c bs' (LBS.length bs') 640 | | bs' <- shrink bs ] 641 | 642 | instance Arbitrary LBS.ByteString where 643 | arbitrary = fmap LBS.pack arbitrary 644 | shrink = map LBS.pack . shrink . LBS.unpack 645 | 646 | instance Arbitrary BS.ByteString where 647 | arbitrary = fmap BS.pack arbitrary 648 | shrink = map BS.pack . shrink . BS.unpack 649 | 650 | instance Arbitrary Ownership where 651 | arbitrary = Ownership <$> name <*> name 652 | <*> idno <*> idno 653 | where 654 | name = listOf0ToN 32 (arbitrary `suchThat` (/= '\0')) 655 | idno = arbitraryOctal 7 656 | 657 | shrink (Ownership oname gname oid gid) = 658 | [ Ownership oname' gname' oid' gid' 659 | | (oname', gname', oid', gid') <- shrink (oname, gname, oid, gid) ] 660 | 661 | instance Arbitrary Format where 662 | arbitrary = elements [V7Format, UstarFormat, GnuFormat] 663 | 664 | 665 | --arbitraryOctal :: (Integral n, Random n) => Int -> Gen n 666 | arbitraryOctal n = 667 | oneof [ pure 0 668 | , choose (0, upperBound) 669 | , pure upperBound 670 | ] 671 | where 672 | upperBound = 8^n-1 673 | 674 | -- For QC tests it's useful to have a way to limit the info to that which can 675 | -- be expressed in the old V7 format 676 | limitToV7FormatCompat :: Entry -> Entry 677 | limitToV7FormatCompat entry@Entry { entryFormat = V7Format } = 678 | entry { 679 | entryContent = case entryContent entry of 680 | CharacterDevice _ _ -> OtherEntryType '3' LBS.empty 0 681 | BlockDevice _ _ -> OtherEntryType '4' LBS.empty 0 682 | Directory -> OtherEntryType '5' LBS.empty 0 683 | NamedPipe -> OtherEntryType '6' LBS.empty 0 684 | other -> other, 685 | 686 | entryOwnership = (entryOwnership entry) { 687 | groupName = "", 688 | ownerName = "" 689 | }, 690 | 691 | entryTarPath = let TarPath name _prefix = entryTarPath entry 692 | in TarPath name BS.empty 693 | } 694 | limitToV7FormatCompat entry = entry 695 | 696 | #endif 697 | 698 | -------------------------------------------------------------------------------- /Codec/Archive/Tar/Index.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP, BangPatterns, PatternGuards #-} 2 | {-# LANGUAGE GeneralizedNewtypeDeriving, DeriveDataTypeable #-} 3 | 4 | ----------------------------------------------------------------------------- 5 | -- | 6 | -- Module : Codec.Archive.Tar.Index 7 | -- Copyright : (c) 2010-2015 Duncan Coutts 8 | -- License : BSD3 9 | -- 10 | -- Maintainer : duncan@community.haskell.org 11 | -- Portability : portable 12 | -- 13 | -- Random access to the content of a @.tar@ archive. 14 | -- 15 | -- This module uses common names and so is designed to be imported qualified: 16 | -- 17 | -- > import qualified Codec.Archive.Tar.Index as TarIndex 18 | -- 19 | ----------------------------------------------------------------------------- 20 | module Codec.Archive.Tar.Index ( 21 | 22 | -- | The @tar@ format does not contain an index of files within the 23 | -- archive. Normally, @tar@ file have to be processed linearly. It is 24 | -- sometimes useful however to be able to get random access to files 25 | -- within the archive. 26 | -- 27 | -- This module provides an index of a @tar@ file. A linear pass of the 28 | -- @tar@ file is needed to 'build' the 'TarIndex', but thereafter you can 29 | -- 'lookup' paths in the @tar@ file, and then use 'hReadEntry' to 30 | -- seek to the right part of the file and read the entry. 31 | -- 32 | -- An index cannot be used to lookup 'Directory' entries in a tar file; 33 | -- instead, you will get 'TarDir' entry listing all the entries in the 34 | -- directory. 35 | 36 | -- * Index type 37 | TarIndex, 38 | 39 | -- * Index lookup 40 | lookup, 41 | TarIndexEntry(..), 42 | toList, 43 | 44 | -- ** I\/O operations 45 | TarEntryOffset, 46 | hReadEntry, 47 | hReadEntryHeader, 48 | 49 | -- * Index construction 50 | build, 51 | -- ** Incremental construction 52 | -- $incremental-construction 53 | IndexBuilder, 54 | empty, 55 | addNextEntry, 56 | skipNextEntry, 57 | finalise, 58 | unfinalise, 59 | 60 | -- * Serialising indexes 61 | serialise, 62 | deserialise, 63 | 64 | -- * Lower level operations with offsets and I\/O on tar files 65 | hReadEntryHeaderOrEof, 66 | hSeekEntryOffset, 67 | hSeekEntryContentOffset, 68 | hSeekEndEntryOffset, 69 | nextEntryOffset, 70 | indexEndEntryOffset, 71 | indexNextEntryOffset, 72 | 73 | -- * Deprecated aliases 74 | emptyIndex, 75 | finaliseIndex, 76 | 77 | #ifdef TESTS 78 | prop_lookup, 79 | prop_toList, 80 | prop_valid, 81 | prop_serialise_deserialise, 82 | prop_serialiseSize, 83 | prop_index_matches_tar, 84 | prop_finalise_unfinalise, 85 | #endif 86 | ) where 87 | 88 | import Data.Typeable (Typeable) 89 | 90 | import Codec.Archive.Tar.Types as Tar 91 | import Codec.Archive.Tar.Read as Tar 92 | import qualified Codec.Archive.Tar.Index.StringTable as StringTable 93 | import Codec.Archive.Tar.Index.StringTable (StringTable, StringTableBuilder) 94 | import qualified Codec.Archive.Tar.Index.IntTrie as IntTrie 95 | import Codec.Archive.Tar.Index.IntTrie (IntTrie, IntTrieBuilder) 96 | 97 | import qualified System.FilePath.Posix as FilePath 98 | import Data.Monoid (Monoid(..)) 99 | #if (MIN_VERSION_base(4,5,0)) 100 | import Data.Monoid ((<>)) 101 | #endif 102 | import Data.Word 103 | import Data.Int 104 | import Data.Bits 105 | import qualified Data.Array.Unboxed as A 106 | import Prelude hiding (lookup) 107 | import System.IO 108 | import Control.Exception (assert, throwIO) 109 | import Control.DeepSeq 110 | 111 | import qualified Data.ByteString as BS 112 | import qualified Data.ByteString.Char8 as BS.Char8 113 | import qualified Data.ByteString.Lazy as LBS 114 | import qualified Data.ByteString.Unsafe as BS 115 | #if MIN_VERSION_bytestring(0,10,2) || defined(MIN_VERSION_bytestring_builder) 116 | import Data.ByteString.Builder as BS 117 | import Data.ByteString.Builder.Extra as BS (toLazyByteStringWith, 118 | untrimmedStrategy) 119 | #else 120 | import Data.ByteString.Lazy.Builder as BS 121 | import Data.ByteString.Lazy.Builder.Extras as BS (toLazyByteStringWith, 122 | untrimmedStrategy) 123 | #endif 124 | 125 | #ifdef TESTS 126 | import qualified Prelude 127 | import Test.QuickCheck 128 | import Test.QuickCheck.Property (ioProperty) 129 | import Control.Applicative ((<$>), (<*>)) 130 | import Control.Monad (unless) 131 | import Data.List (nub, sort, sortBy, stripPrefix, isPrefixOf) 132 | import Data.Maybe 133 | import Data.Function (on) 134 | import Control.Exception (SomeException, try) 135 | import Codec.Archive.Tar.Write as Tar 136 | import qualified Data.ByteString.Handle as HBS 137 | #endif 138 | 139 | 140 | -- | An index of the entries in a tar file. 141 | -- 142 | -- This index type is designed to be quite compact and suitable to store either 143 | -- on disk or in memory. 144 | -- 145 | data TarIndex = TarIndex 146 | 147 | -- As an example of how the mapping works, consider these example files: 148 | -- "foo/bar.hs" at offset 0 149 | -- "foo/baz.hs" at offset 1024 150 | -- 151 | -- We split the paths into components and enumerate them. 152 | -- { "foo" -> TokenId 0, "bar.hs" -> TokenId 1, "baz.hs" -> TokenId 2 } 153 | -- 154 | -- We convert paths into sequences of 'TokenId's, i.e. 155 | -- "foo/bar.hs" becomes [PathComponentId 0, PathComponentId 1] 156 | -- "foo/baz.hs" becomes [PathComponentId 0, PathComponentId 2] 157 | -- 158 | -- We use a trie mapping sequences of 'PathComponentId's to the entry offset: 159 | -- { [PathComponentId 0, PathComponentId 1] -> offset 0 160 | -- , [PathComponentId 0, PathComponentId 2] -> offset 1024 } 161 | 162 | -- The mapping of filepath components as strings to ids. 163 | {-# UNPACK #-} !(StringTable PathComponentId) 164 | 165 | -- Mapping of sequences of filepath component ids to tar entry offsets. 166 | {-# UNPACK #-} !(IntTrie PathComponentId TarEntryOffset) 167 | 168 | -- The offset immediatly after the last entry, where we would append any 169 | -- additional entries. 170 | {-# UNPACK #-} !TarEntryOffset 171 | 172 | deriving (Eq, Show, Typeable) 173 | 174 | instance NFData TarIndex where 175 | rnf (TarIndex _ _ _) = () -- fully strict by construction 176 | 177 | -- | The result of 'lookup' in a 'TarIndex'. It can either be a file directly, 178 | -- or a directory entry containing further entries (and all subdirectories 179 | -- recursively). Note that the subtrees are constructed lazily, so it's 180 | -- cheaper if you don't look at them. 181 | -- 182 | data TarIndexEntry = TarFileEntry {-# UNPACK #-} !TarEntryOffset 183 | | TarDir [(FilePath, TarIndexEntry)] 184 | deriving (Show, Typeable) 185 | 186 | 187 | newtype PathComponentId = PathComponentId Int 188 | deriving (Eq, Ord, Enum, Show, Typeable) 189 | 190 | -- | An offset within a tar file. Use 'hReadEntry', 'hReadEntryHeader' or 191 | -- 'hSeekEntryOffset'. 192 | -- 193 | -- This is actually a tar \"record\" number, not a byte offset. 194 | -- 195 | type TarEntryOffset = Word32 196 | 197 | 198 | -- | Look up a given filepath in the 'TarIndex'. It may return a 'TarFileEntry' 199 | -- containing the 'TarEntryOffset' of the file within the tar file, or if 200 | -- the filepath identifies a directory then it returns a 'TarDir' containing 201 | -- the list of files within that directory. 202 | -- 203 | -- Given the 'TarEntryOffset' you can then use one of the I\/O operations: 204 | -- 205 | -- * 'hReadEntry' to read the whole entry; 206 | -- 207 | -- * 'hReadEntryHeader' to read just the file metadata (e.g. its length); 208 | -- 209 | lookup :: TarIndex -> FilePath -> Maybe TarIndexEntry 210 | lookup (TarIndex pathTable pathTrie _) path = do 211 | fpath <- toComponentIds pathTable path 212 | tentry <- IntTrie.lookup pathTrie fpath 213 | return (mkIndexEntry tentry) 214 | where 215 | mkIndexEntry (IntTrie.Entry offset) = TarFileEntry offset 216 | mkIndexEntry (IntTrie.Completions entries) = 217 | TarDir [ (fromComponentId pathTable key, mkIndexEntry entry) 218 | | (key, entry) <- entries ] 219 | 220 | 221 | toComponentIds :: StringTable PathComponentId -> FilePath -> Maybe [PathComponentId] 222 | toComponentIds table = 223 | lookupComponents [] 224 | . filter (/= BS.Char8.singleton '.') 225 | . splitDirectories 226 | . BS.Char8.pack 227 | where 228 | lookupComponents cs' [] = Just (reverse cs') 229 | lookupComponents cs' (c:cs) = case StringTable.lookup table c of 230 | Nothing -> Nothing 231 | Just cid -> lookupComponents (cid:cs') cs 232 | 233 | fromComponentId :: StringTable PathComponentId -> PathComponentId -> FilePath 234 | fromComponentId table = BS.Char8.unpack . StringTable.index table 235 | 236 | -- | All the files in the index with their corresponding 'TarEntryOffset's. 237 | -- 238 | -- Note that the files are in no special order. If you intend to read all or 239 | -- most files then is is recommended to sort by the 'TarEntryOffset'. 240 | -- 241 | toList :: TarIndex -> [(FilePath, TarEntryOffset)] 242 | toList (TarIndex pathTable pathTrie _) = 243 | [ (path, off) 244 | | (cids, off) <- IntTrie.toList pathTrie 245 | , let path = FilePath.joinPath (map (fromComponentId pathTable) cids) ] 246 | 247 | 248 | -- | Build a 'TarIndex' from a sequence of tar 'Entries'. The 'Entries' are 249 | -- assumed to start at offset @0@ within a file. 250 | -- 251 | build :: Entries e -> Either e TarIndex 252 | build = go empty 253 | where 254 | go !builder (Next e es) = go (addNextEntry e builder) es 255 | go !builder Done = Right $! finalise builder 256 | go !_ (Fail err) = Left err 257 | 258 | 259 | -- $incremental-construction 260 | -- If you need more control than 'build' then you can construct the index 261 | -- in an acumulator style using the 'IndexBuilder' and operations. 262 | -- 263 | -- Start with 'empty' and use 'addNextEntry' (or 'skipNextEntry') for 264 | -- each 'Entry' in the tar file in order. Every entry must added or skipped in 265 | -- order, otherwise the resulting 'TarIndex' will report the wrong 266 | -- 'TarEntryOffset's. At the end use 'finalise' to get the 'TarIndex'. 267 | -- 268 | -- For example, 'build' is simply: 269 | -- 270 | -- > build = go empty 271 | -- > where 272 | -- > go !builder (Next e es) = go (addNextEntry e builder) es 273 | -- > go !builder Done = Right $! finalise builder 274 | -- > go !_ (Fail err) = Left err 275 | 276 | 277 | -- | The intermediate type used for incremental construction of a 'TarIndex'. 278 | -- 279 | data IndexBuilder 280 | = IndexBuilder !(StringTableBuilder PathComponentId) 281 | !(IntTrieBuilder PathComponentId TarEntryOffset) 282 | {-# UNPACK #-} !TarEntryOffset 283 | deriving (Eq, Show) 284 | 285 | instance NFData IndexBuilder where 286 | rnf (IndexBuilder _ _ _) = () -- fully strict by construction 287 | 288 | -- | The initial empty 'IndexBuilder'. 289 | -- 290 | empty :: IndexBuilder 291 | empty = IndexBuilder StringTable.empty IntTrie.empty 0 292 | 293 | emptyIndex :: IndexBuilder 294 | emptyIndex = empty 295 | {-# DEPRECATED emptyIndex "Use TarIndex.empty" #-} 296 | 297 | -- | Add the next 'Entry' into the 'IndexBuilder'. 298 | -- 299 | addNextEntry :: Entry -> IndexBuilder -> IndexBuilder 300 | addNextEntry entry (IndexBuilder stbl itrie nextOffset) = 301 | IndexBuilder stbl' itrie' 302 | (nextEntryOffset entry nextOffset) 303 | where 304 | !entrypath = splitTarPath (entryTarPath entry) 305 | (stbl', cids) = StringTable.inserts entrypath stbl 306 | itrie' = IntTrie.insert cids nextOffset itrie 307 | 308 | -- | Use this function if you want to skip some entries and not add them to the 309 | -- final 'TarIndex'. 310 | -- 311 | skipNextEntry :: Entry -> IndexBuilder -> IndexBuilder 312 | skipNextEntry entry (IndexBuilder stbl itrie nextOffset) = 313 | IndexBuilder stbl itrie (nextEntryOffset entry nextOffset) 314 | 315 | -- | Finish accumulating 'Entry' information and build the compact 'TarIndex' 316 | -- lookup structure. 317 | -- 318 | finalise :: IndexBuilder -> TarIndex 319 | finalise (IndexBuilder stbl itrie finalOffset) = 320 | TarIndex pathTable pathTrie finalOffset 321 | where 322 | pathTable = StringTable.finalise stbl 323 | pathTrie = IntTrie.finalise itrie 324 | 325 | finaliseIndex :: IndexBuilder -> TarIndex 326 | finaliseIndex = finalise 327 | {-# DEPRECATED finaliseIndex "Use TarIndex.finalise" #-} 328 | 329 | -- | This is the offset immediately following the entry most recently added 330 | -- to the 'IndexBuilder'. You might use this if you need to know the offsets 331 | -- but don't want to use the 'TarIndex' lookup structure. 332 | -- Use with 'hSeekEntryOffset'. See also 'nextEntryOffset'. 333 | -- 334 | indexNextEntryOffset :: IndexBuilder -> TarEntryOffset 335 | indexNextEntryOffset (IndexBuilder _ _ off) = off 336 | 337 | -- | This is the offset immediately following the last entry in the tar file. 338 | -- This can be useful to append further entries into the tar file. 339 | -- Use with 'hSeekEntryOffset', or just use 'hSeekEndEntryOffset' directly. 340 | -- 341 | indexEndEntryOffset :: TarIndex -> TarEntryOffset 342 | indexEndEntryOffset (TarIndex _ _ off) = off 343 | 344 | -- | Calculate the 'TarEntryOffset' of the next entry, given the size and 345 | -- offset of the current entry. 346 | -- 347 | -- This is much like using 'skipNextEntry' and 'indexNextEntryOffset', but without 348 | -- using an 'IndexBuilder'. 349 | -- 350 | nextEntryOffset :: Entry -> TarEntryOffset -> TarEntryOffset 351 | nextEntryOffset entry offset = 352 | offset 353 | + 1 354 | + case entryContent entry of 355 | NormalFile _ size -> blocks size 356 | OtherEntryType _ _ size -> blocks size 357 | _ -> 0 358 | where 359 | -- NOTE: to avoid underflow, do the (fromIntegral :: Int64 -> Word32) last 360 | blocks :: Int64 -> TarEntryOffset 361 | blocks size = fromIntegral (1 + (size - 1) `div` 512) 362 | 363 | type FilePathBS = BS.ByteString 364 | 365 | splitTarPath :: TarPath -> [FilePathBS] 366 | splitTarPath (TarPath name prefix) = 367 | splitDirectories prefix ++ splitDirectories name 368 | 369 | splitDirectories :: FilePathBS -> [FilePathBS] 370 | splitDirectories bs = 371 | case BS.Char8.split '/' bs of 372 | c:cs | BS.null c -> BS.Char8.singleton '/' : filter (not . BS.null) cs 373 | cs -> filter (not . BS.null) cs 374 | 375 | 376 | ------------------------- 377 | -- Resume building an existing index 378 | -- 379 | 380 | -- | Resume building an existing index 381 | -- 382 | -- A 'TarIndex' is optimized for a highly compact and efficient in-memory 383 | -- representation. This, however, makes it read-only. If you have an existing 384 | -- 'TarIndex' for a large file, and want to add to it, you can translate the 385 | -- 'TarIndex' back to an 'IndexBuilder'. Be aware that this is a relatively 386 | -- costly operation (linear in the size of the 'TarIndex'), though still 387 | -- faster than starting again from scratch. 388 | -- 389 | -- This is the left inverse to 'finalise' (modulo ordering). 390 | -- 391 | unfinalise :: TarIndex -> IndexBuilder 392 | unfinalise (TarIndex pathTable pathTrie finalOffset) = 393 | IndexBuilder (StringTable.unfinalise pathTable) 394 | (IntTrie.unfinalise pathTrie) 395 | finalOffset 396 | 397 | 398 | ------------------------- 399 | -- I/O operations 400 | -- 401 | 402 | -- | Reads an entire 'Entry' at the given 'TarEntryOffset' in the tar file. 403 | -- The 'Handle' must be open for reading and be seekable. 404 | -- 405 | -- This reads the whole entry into memory strictly, not incrementally. For more 406 | -- control, use 'hReadEntryHeader' and then read the entry content manually. 407 | -- 408 | hReadEntry :: Handle -> TarEntryOffset -> IO Entry 409 | hReadEntry hnd off = do 410 | entry <- hReadEntryHeader hnd off 411 | case entryContent entry of 412 | NormalFile _ size -> do body <- LBS.hGet hnd (fromIntegral size) 413 | return entry { 414 | entryContent = NormalFile body size 415 | } 416 | OtherEntryType c _ size -> do body <- LBS.hGet hnd (fromIntegral size) 417 | return entry { 418 | entryContent = OtherEntryType c body size 419 | } 420 | _ -> return entry 421 | 422 | -- | Read the header for a 'Entry' at the given 'TarEntryOffset' in the tar 423 | -- file. The 'entryContent' will contain the correct metadata but an empty file 424 | -- content. The 'Handle' must be open for reading and be seekable. 425 | -- 426 | -- The 'Handle' position is advanced to the beginning of the entry content (if 427 | -- any). You must check the 'entryContent' to see if the entry is of type 428 | -- 'NormalFile'. If it is, the 'NormalFile' gives the content length and you 429 | -- are free to read this much data from the 'Handle'. 430 | -- 431 | -- > entry <- Tar.hReadEntryHeader hnd 432 | -- > case Tar.entryContent entry of 433 | -- > Tar.NormalFile _ size -> do content <- BS.hGet hnd size 434 | -- > ... 435 | -- 436 | -- Of course you don't have to read it all in one go (as 'hReadEntry' does), 437 | -- you can use any appropriate method to read it incrementally. 438 | -- 439 | -- In addition to I\/O errors, this can throw a 'FormatError' if the offset is 440 | -- wrong, or if the file is not valid tar format. 441 | -- 442 | -- There is also the lower level operation 'hSeekEntryOffset'. 443 | -- 444 | hReadEntryHeader :: Handle -> TarEntryOffset -> IO Entry 445 | hReadEntryHeader hnd blockOff = do 446 | hSeekEntryOffset hnd blockOff 447 | header <- LBS.hGet hnd 512 448 | case Tar.read header of 449 | Tar.Next entry _ -> return entry 450 | Tar.Fail e -> throwIO e 451 | Tar.Done -> fail "hReadEntryHeader: impossible" 452 | 453 | -- | Set the 'Handle' position to the position corresponding to the given 454 | -- 'TarEntryOffset'. 455 | -- 456 | -- This position is where the entry metadata can be read. If you already know 457 | -- the entry has a body (and perhaps know it's length), you may wish to seek to 458 | -- the body content directly using 'hSeekEntryContentOffset'. 459 | -- 460 | hSeekEntryOffset :: Handle -> TarEntryOffset -> IO () 461 | hSeekEntryOffset hnd blockOff = 462 | hSeek hnd AbsoluteSeek (fromIntegral blockOff * 512) 463 | 464 | -- | Set the 'Handle' position to the entry content position corresponding to 465 | -- the given 'TarEntryOffset'. 466 | -- 467 | -- This position is where the entry content can be read using ordinary I\/O 468 | -- operations (though you have to know in advance how big the entry content 469 | -- is). This is /only valid/ if you /already know/ the entry has a body (i.e. 470 | -- is a normal file). 471 | -- 472 | hSeekEntryContentOffset :: Handle -> TarEntryOffset -> IO () 473 | hSeekEntryContentOffset hnd blockOff = 474 | hSeekEntryOffset hnd (blockOff + 1) 475 | 476 | -- | This is a low level variant on 'hReadEntryHeader', that can be used to 477 | -- iterate through a tar file, entry by entry. 478 | -- 479 | -- It has a few differences compared to 'hReadEntryHeader': 480 | -- 481 | -- * It returns an indication when the end of the tar file is reached. 482 | -- 483 | -- * It /does not/ move the 'Handle' position to the beginning of the entry 484 | -- content. 485 | -- 486 | -- * It returns the 'TarEntryOffset' of the next entry. 487 | -- 488 | -- After this action, the 'Handle' position is not in any useful place. If 489 | -- you want to skip to the next entry, take the 'TarEntryOffset' returned and 490 | -- use 'hReadEntryHeaderOrEof' again. Or if having inspected the 'Entry' 491 | -- header you want to read the entry content (if it has one) then use 492 | -- 'hSeekEntryContentOffset' on the original input 'TarEntryOffset'. 493 | -- 494 | hReadEntryHeaderOrEof :: Handle -> TarEntryOffset 495 | -> IO (Maybe (Entry, TarEntryOffset)) 496 | hReadEntryHeaderOrEof hnd blockOff = do 497 | hSeekEntryOffset hnd blockOff 498 | header <- LBS.hGet hnd 1024 499 | case Tar.read header of 500 | Tar.Next entry _ -> let !blockOff' = nextEntryOffset entry blockOff 501 | in return (Just (entry, blockOff')) 502 | Tar.Done -> return Nothing 503 | Tar.Fail e -> throwIO e 504 | 505 | -- | Seek to the end of a tar file, to the position where new entries can 506 | -- be appended, and return that 'TarEntryOffset'. 507 | -- 508 | -- If you have a valid 'TarIndex' for this tar file then you should supply it 509 | -- because it allows seeking directly to the correct location. 510 | -- 511 | -- If you do not have an index, then this becomes an expensive linear 512 | -- operation because we have to read each tar entry header from the beginning 513 | -- to find the location immediately after the last entry (this is because tar 514 | -- files have a variable length trailer and we cannot reliably find that by 515 | -- starting at the end). In this mode, it will fail with an exception if the 516 | -- file is not in fact in the tar format. 517 | -- 518 | hSeekEndEntryOffset :: Handle -> Maybe TarIndex -> IO TarEntryOffset 519 | hSeekEndEntryOffset hnd (Just index) = do 520 | let offset = indexEndEntryOffset index 521 | hSeekEntryOffset hnd offset 522 | return offset 523 | 524 | hSeekEndEntryOffset hnd Nothing = do 525 | size <- hFileSize hnd 526 | if size == 0 527 | then return 0 528 | else seekToEnd 0 529 | where 530 | seekToEnd offset = do 531 | mbe <- hReadEntryHeaderOrEof hnd offset 532 | case mbe of 533 | Nothing -> do hSeekEntryOffset hnd offset 534 | return offset 535 | Just (_, offset') -> seekToEnd offset' 536 | 537 | ------------------------- 538 | -- (de)serialisation 539 | -- 540 | 541 | -- | The 'TarIndex' is compact in memory, and it has a similarly compact 542 | -- external representation. 543 | -- 544 | serialise :: TarIndex -> BS.ByteString 545 | serialise = toStrict . serialiseLBS 546 | 547 | -- we keep this version around just so we can check we got the size right. 548 | serialiseLBS :: TarIndex -> LBS.ByteString 549 | serialiseLBS index = 550 | BS.toLazyByteStringWith 551 | (BS.untrimmedStrategy (serialiseSize index) 512) LBS.empty 552 | (serialiseBuilder index) 553 | 554 | serialiseSize :: TarIndex -> Int 555 | serialiseSize (TarIndex stringTable intTrie _) = 556 | StringTable.serialiseSize stringTable 557 | + IntTrie.serialiseSize intTrie 558 | + 8 559 | 560 | serialiseBuilder :: TarIndex -> BS.Builder 561 | serialiseBuilder (TarIndex stringTable intTrie finalOffset) = 562 | BS.word32BE 2 -- format version 563 | <> BS.word32BE finalOffset 564 | <> StringTable.serialise stringTable 565 | <> IntTrie.serialise intTrie 566 | 567 | -- | Read the external representation back into a 'TarIndex'. 568 | -- 569 | deserialise :: BS.ByteString -> Maybe (TarIndex, BS.ByteString) 570 | deserialise bs 571 | | BS.length bs < 8 572 | = Nothing 573 | 574 | | let ver = readWord32BE bs 0 575 | , ver == 1 576 | = do let !finalOffset = readWord32BE bs 4 577 | (stringTable, bs') <- StringTable.deserialiseV1 (BS.drop 8 bs) 578 | (intTrie, bs'') <- IntTrie.deserialise bs' 579 | return (TarIndex stringTable intTrie finalOffset, bs'') 580 | 581 | | let ver = readWord32BE bs 0 582 | , ver == 2 583 | = do let !finalOffset = readWord32BE bs 4 584 | (stringTable, bs') <- StringTable.deserialiseV2 (BS.drop 8 bs) 585 | (intTrie, bs'') <- IntTrie.deserialise bs' 586 | return (TarIndex stringTable intTrie finalOffset, bs'') 587 | 588 | | otherwise = Nothing 589 | 590 | readWord32BE :: BS.ByteString -> Int -> Word32 591 | readWord32BE bs i = 592 | assert (i >= 0 && i+3 <= BS.length bs - 1) $ 593 | fromIntegral (BS.unsafeIndex bs (i + 0)) `shiftL` 24 594 | + fromIntegral (BS.unsafeIndex bs (i + 1)) `shiftL` 16 595 | + fromIntegral (BS.unsafeIndex bs (i + 2)) `shiftL` 8 596 | + fromIntegral (BS.unsafeIndex bs (i + 3)) 597 | 598 | 599 | ------------------------- 600 | -- Test properties 601 | -- 602 | 603 | #ifdef TESTS 604 | 605 | -- Not quite the properties of a finite mapping because we also have lookups 606 | -- that result in completions. 607 | 608 | prop_lookup :: ValidPaths -> NonEmptyFilePath -> Bool 609 | prop_lookup (ValidPaths paths) (NonEmptyFilePath p) = 610 | case (lookup index p, Prelude.lookup p paths) of 611 | (Nothing, Nothing) -> True 612 | (Just (TarFileEntry offset), Just (_,offset')) -> offset == offset' 613 | (Just (TarDir entries), Nothing) -> sort (nub (map fst entries)) 614 | == sort (nub completions) 615 | _ -> False 616 | where 617 | index = construct paths 618 | completions = [ head (FilePath.splitDirectories completion) 619 | | (path,_) <- paths 620 | , completion <- maybeToList $ stripPrefix (p ++ "/") path ] 621 | 622 | prop_toList :: ValidPaths -> Bool 623 | prop_toList (ValidPaths paths) = 624 | sort (toList index) 625 | == sort [ (path, off) | (path, (_sz, off)) <- paths ] 626 | where 627 | index = construct paths 628 | 629 | prop_valid :: ValidPaths -> Bool 630 | prop_valid (ValidPaths paths) 631 | | not $ StringTable.prop_valid pathbits = error "TarIndex: bad string table" 632 | | not $ IntTrie.prop_lookup intpaths = error "TarIndex: bad int trie" 633 | | not $ IntTrie.prop_completions intpaths = error "TarIndex: bad int trie" 634 | | not $ prop' = error "TarIndex: bad prop" 635 | | otherwise = True 636 | 637 | where 638 | index@(TarIndex pathTable _ _) = construct paths 639 | 640 | pathbits = concatMap (map BS.Char8.pack . FilePath.splitDirectories . fst) 641 | paths 642 | intpaths = [ (cids, offset) 643 | | (path, (_size, offset)) <- paths 644 | , let Just cids = toComponentIds pathTable path ] 645 | prop' = flip all paths $ \(file, (_size, offset)) -> 646 | case lookup index file of 647 | Just (TarFileEntry offset') -> offset' == offset 648 | _ -> False 649 | 650 | prop_serialise_deserialise :: ValidPaths -> Bool 651 | prop_serialise_deserialise (ValidPaths paths) = 652 | Just (index, BS.empty) == (deserialise . serialise) index 653 | where 654 | index = construct paths 655 | 656 | prop_serialiseSize :: ValidPaths -> Bool 657 | prop_serialiseSize (ValidPaths paths) = 658 | case (LBS.toChunks . serialiseLBS) index of 659 | [c1] -> BS.length c1 == serialiseSize index 660 | _ -> False 661 | where 662 | index = construct paths 663 | 664 | newtype NonEmptyFilePath = NonEmptyFilePath FilePath deriving Show 665 | 666 | instance Arbitrary NonEmptyFilePath where 667 | arbitrary = NonEmptyFilePath . FilePath.joinPath 668 | <$> listOf1 (elements ["a", "b", "c", "d"]) 669 | 670 | newtype ValidPaths = ValidPaths [(FilePath, (Int64, TarEntryOffset))] deriving Show 671 | 672 | instance Arbitrary ValidPaths where 673 | arbitrary = do 674 | paths <- makeNoPrefix <$> listOf arbitraryPath 675 | sizes <- vectorOf (length paths) (getNonNegative <$> arbitrary) 676 | let offsets = scanl (\o sz -> o + 1 + blocks sz) 0 sizes 677 | return (ValidPaths (zip paths (zip sizes offsets))) 678 | where 679 | arbitraryPath = FilePath.joinPath 680 | <$> listOf1 (elements ["a", "b", "c", "d"]) 681 | makeNoPrefix [] = [] 682 | makeNoPrefix (k:ks) 683 | | all (not . isPrefixOfOther k) ks 684 | = k : makeNoPrefix ks 685 | | otherwise = makeNoPrefix ks 686 | 687 | isPrefixOfOther a b = a `isPrefixOf` b || b `isPrefixOf` a 688 | 689 | blocks :: Int64 -> TarEntryOffset 690 | blocks size = fromIntegral (1 + ((size - 1) `div` 512)) 691 | 692 | -- Helper for bulk construction. 693 | construct :: [(FilePath, (Int64, TarEntryOffset))] -> TarIndex 694 | construct = 695 | either (\_ -> undefined) id 696 | . build 697 | . foldr (\(path, (size, _off)) es -> Next (testEntry path size) es) Done 698 | 699 | example0 :: Entries () 700 | example0 = 701 | testEntry "foo-1.0/foo-1.0.cabal" 1500 -- at block 0 702 | `Next` testEntry "foo-1.0/LICENSE" 2000 -- at block 4 703 | `Next` testEntry "foo-1.0/Data/Foo.hs" 1000 -- at block 9 704 | `Next` Done 705 | 706 | example1 :: Entries () 707 | example1 = 708 | Next (testEntry "./" 1500) Done <> example0 709 | 710 | testEntry :: FilePath -> Int64 -> Entry 711 | testEntry name size = simpleEntry path (NormalFile mempty size) 712 | where 713 | Right path = toTarPath False name 714 | 715 | -- | Simple tar archive containing regular files only 716 | data SimpleTarArchive = SimpleTarArchive { 717 | simpleTarEntries :: Tar.Entries () 718 | , simpleTarRaw :: [(FilePath, LBS.ByteString)] 719 | , simpleTarBS :: LBS.ByteString 720 | } 721 | 722 | instance Show SimpleTarArchive where 723 | show = show . simpleTarRaw 724 | 725 | prop_index_matches_tar :: SimpleTarArchive -> Property 726 | prop_index_matches_tar sta = 727 | ioProperty (try go >>= either (\e -> throwIO (e :: SomeException)) 728 | (\_ -> return True)) 729 | where 730 | go :: IO () 731 | go = do 732 | h <- HBS.readHandle True (simpleTarBS sta) 733 | goEntries h 0 (simpleTarEntries sta) 734 | 735 | goEntries :: Handle -> TarEntryOffset -> Tar.Entries () -> IO () 736 | goEntries _ _ Tar.Done = 737 | return () 738 | goEntries _ _ (Tar.Fail _) = 739 | throwIO (userError "Fail entry in SimpleTarArchive") 740 | goEntries h offset (Tar.Next e es) = do 741 | goEntry h offset e 742 | goEntries h (nextEntryOffset e offset) es 743 | 744 | goEntry :: Handle -> TarEntryOffset -> Tar.Entry -> IO () 745 | goEntry h offset e = do 746 | e' <- hReadEntry h offset 747 | case (Tar.entryContent e, Tar.entryContent e') of 748 | (Tar.NormalFile bs sz, Tar.NormalFile bs' sz') -> 749 | unless (sz == sz' && bs == bs') $ 750 | throwIO $ userError "Entry mismatch" 751 | _otherwise -> 752 | throwIO $ userError "unexpected entry types" 753 | 754 | instance Arbitrary SimpleTarArchive where 755 | arbitrary = do 756 | numEntries <- sized $ \n -> choose (0, n) 757 | rawEntries <- mkRaw numEntries 758 | let entries = mkList rawEntries 759 | return SimpleTarArchive { 760 | simpleTarEntries = mkEntries entries 761 | , simpleTarRaw = rawEntries 762 | , simpleTarBS = Tar.write entries 763 | } 764 | where 765 | mkRaw :: Int -> Gen [(FilePath, LBS.ByteString)] 766 | mkRaw 0 = return [] 767 | mkRaw n = do 768 | -- Pick a size around 0, 1, or 2 block boundaries 769 | sz <- sized $ \n -> elements (take n fileSizes) 770 | bs <- LBS.pack `fmap` vectorOf sz arbitrary 771 | es <- mkRaw (n - 1) 772 | return $ ("file" ++ show n, bs) : es 773 | 774 | mkList :: [(FilePath, LBS.ByteString)] -> [Tar.Entry] 775 | mkList [] = [] 776 | mkList ((fp, bs):es) = entry : mkList es 777 | where 778 | Right path = toTarPath False fp 779 | entry = simpleEntry path content 780 | content = NormalFile bs (LBS.length bs) 781 | 782 | mkEntries :: [Tar.Entry] -> Tar.Entries () 783 | mkEntries [] = Tar.Done 784 | mkEntries (e:es) = Tar.Next e (mkEntries es) 785 | 786 | -- Sizes around 0, 1, and 2 block boundaries 787 | fileSizes :: [Int] 788 | fileSizes = [ 789 | 0 , 1 , 2 790 | , 510 , 511 , 512 , 513 , 514 791 | , 1022 , 1023 , 1024 , 1025 , 1026 792 | ] 793 | 794 | -- | 'IndexBuilder' constructed from a 'SimpleIndex' 795 | newtype SimpleIndexBuilder = SimpleIndexBuilder IndexBuilder 796 | deriving Show 797 | 798 | instance Arbitrary SimpleIndexBuilder where 799 | arbitrary = SimpleIndexBuilder . build' . simpleTarEntries <$> arbitrary 800 | where 801 | -- like 'build', but don't finalize 802 | build' :: Show e => Entries e -> IndexBuilder 803 | build' = go empty 804 | where 805 | go !builder (Next e es) = go (addNextEntry e builder) es 806 | go !builder Done = builder 807 | go !_ (Fail err) = error (show err) 808 | 809 | prop_finalise_unfinalise :: SimpleIndexBuilder -> Bool 810 | prop_finalise_unfinalise (SimpleIndexBuilder index) = 811 | unfinalise (finalise index) == index 812 | 813 | #endif 814 | 815 | toStrict :: LBS.ByteString -> BS.ByteString 816 | #if MIN_VERSION_bytestring(0,10,0) 817 | toStrict = LBS.toStrict 818 | #else 819 | toStrict = BS.concat . LBS.toChunks 820 | #endif 821 | 822 | #if !(MIN_VERSION_base(4,5,0)) 823 | (<>) :: Monoid m => m -> m -> m 824 | (<>) = mappend 825 | #endif 826 | --------------------------------------------------------------------------------