├── .gitignore ├── .travis.yml ├── ChangeLog.md ├── LICENSE ├── README.md ├── Setup.hs ├── bisect-binary.cabal └── src ├── Braille.hs ├── Intervals.hs └── Main.hs /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | dist-* 3 | cabal-dev 4 | *.o 5 | *.hi 6 | *.chi 7 | *.chs.h 8 | *.dyn_o 9 | *.dyn_hi 10 | .hpc 11 | .hsenv 12 | .cabal-sandbox/ 13 | cabal.sandbox.config 14 | *.prof 15 | *.aux 16 | *.hp 17 | *.eventlog 18 | .stack-work/ 19 | cabal.project.local 20 | cabal.project.local~ 21 | .HTF/ 22 | .ghc.environment.* 23 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # This Travis job script has been generated by a script via 2 | # 3 | # make_travis_yml_2.hs 'bisect-binary.cabal' '-o' '.travis.yml' 4 | # 5 | # For more information, see https://github.com/hvr/multi-ghc-travis 6 | # 7 | language: c 8 | sudo: false 9 | 10 | git: 11 | submodules: false # whether to recursively clone submodules 12 | 13 | cache: 14 | directories: 15 | - $HOME/.cabal/packages 16 | - $HOME/.cabal/store 17 | 18 | before_cache: 19 | - rm -fv $HOME/.cabal/packages/hackage.haskell.org/build-reports.log 20 | # remove files that are regenerated by 'cabal update' 21 | - rm -fv $HOME/.cabal/packages/hackage.haskell.org/00-index.* 22 | - rm -fv $HOME/.cabal/packages/hackage.haskell.org/*.json 23 | - rm -fv $HOME/.cabal/packages/hackage.haskell.org/01-index.cache 24 | - rm -fv $HOME/.cabal/packages/hackage.haskell.org/01-index.tar 25 | - rm -fv $HOME/.cabal/packages/hackage.haskell.org/01-index.tar.idx 26 | 27 | matrix: 28 | include: 29 | - compiler: "ghc-8.0.2" 30 | # env: TEST=--disable-tests BENCH=--disable-benchmarks 31 | addons: {apt: {packages: [ghc-ppa-tools,cabal-install-head,ghc-8.0.2], sources: [hvr-ghc]}} 32 | - compiler: "ghc-8.2.2" 33 | # env: TEST=--disable-tests BENCH=--disable-benchmarks 34 | addons: {apt: {packages: [ghc-ppa-tools,cabal-install-head,ghc-8.2.2], sources: [hvr-ghc]}} 35 | 36 | before_install: 37 | - HC=${CC} 38 | - HCPKG=${HC/ghc/ghc-pkg} 39 | - unset CC 40 | - PATH=/opt/ghc/bin:/opt/ghc-ppa-tools/bin:$PATH 41 | 42 | install: 43 | - cabal --version 44 | - echo "$(${HC} --version) [$(${HC} --print-project-git-commit-id 2> /dev/null || echo '?')]" 45 | - BENCH=${BENCH---enable-benchmarks} 46 | - TEST=${TEST---enable-tests} 47 | - HADDOCK=${HADDOCK-true} 48 | - INSTALLED=${INSTALLED-true} 49 | - travis_retry cabal update -v 50 | - sed -i.bak 's/^jobs:/-- jobs:/' ${HOME}/.cabal/config 51 | - rm -fv cabal.project.local 52 | - "echo 'packages: .' > cabal.project" 53 | - if [ -f "./configure.ac" ]; then 54 | (cd "."; autoreconf -i); 55 | fi 56 | - rm -f cabal.project.freeze 57 | - cabal new-build -w ${HC} ${TEST} ${BENCH} --project-file="cabal.project" --dep -j2 all 58 | - cabal new-build -w ${HC} --disable-tests --disable-benchmarks --project-file="cabal.project" --dep -j2 all 59 | - rm -rf "."/.ghc.environment.* "."/dist 60 | - DISTDIR=$(mktemp -d /tmp/dist-test.XXXX) 61 | 62 | # Here starts the actual work to be performed for the package under test; 63 | # any command which exits with a non-zero exit code causes the build to fail. 64 | script: 65 | # test that source-distributions can be generated 66 | - (cd "."; cabal sdist) 67 | - mv "."/dist/bisect-binary-*.tar.gz ${DISTDIR}/ 68 | - cd ${DISTDIR} 69 | - find . -maxdepth 1 -name '*.tar.gz' -exec tar -xvf '{}' \; 70 | - "printf 'packages: bisect-binary-*/*.cabal\n' > cabal.project" 71 | # this builds all libraries and executables (without tests/benchmarks) 72 | - cabal new-build -w ${HC} --disable-tests --disable-benchmarks all 73 | 74 | # Build with installed constraints for packages in global-db 75 | - if $INSTALLED; then 76 | echo cabal new-build -w ${HC} --disable-tests --disable-benchmarks $(${HCPKG} list --global --simple-output --names-only | sed 's/\([a-zA-Z0-9-]\{1,\}\) */--constraint="\1 installed" /g') all | sh; 77 | else echo "Not building with installed constraints"; fi 78 | 79 | # build & run tests, build benchmarks 80 | - cabal new-build -w ${HC} ${TEST} ${BENCH} all 81 | 82 | # REGENDATA ["bisect-binary.cabal","-o",".travis.yml"] 83 | # EOF 84 | -------------------------------------------------------------------------------- /ChangeLog.md: -------------------------------------------------------------------------------- 1 | # Revision history for bisect-binary 2 | 3 | ## 0.1.0.1 4 | 5 | * Fix bugs in `Intervals.hs` found by formal verification with Coq. 6 | 7 | ## 0.1 8 | 9 | * First version. 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 Joachim Breitner 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included 12 | in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 17 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 18 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 19 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 20 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | bisect-binary 2 | ============= 3 | 4 | This little program helps with the following task: 5 | 6 | You need to analyse some binary file (e.g., the firmware for some 7 | microcontroller). You want to understand how it does certain things, but the 8 | file is big. One approach to help you in the process is to erase parts of the file 9 | (by overwriting it with binary zeros) and see if the file still does what it 10 | should be doing. If it does, then you know that the interesting parts are in 11 | another part of the file. 12 | 13 | `bisect-binary` assists in this process by doing the book-keeping and zeroing 14 | out parts of the file. 15 | 16 | 17 | Screenshot 18 | ---------- 19 | 20 | The following screenshot shows `bisect-binary` in action, as it determins which 21 | part of a “Hello World” program written in Haskell are actually needed: 22 | It is taken from a [Asciinema demo], and edited to fit your screen. 23 | 24 | ``` 25 | $ bisect-binary -i hello -o hello-test -c 'chmod +x hello-test; timeout -k2s 2s ./hello-test 26 | … 27 | 12.7% zeroes 131072B new 897000B left [⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠈⠛⠛⠛⠛⠛⠛⠁⠀⠀⠀⠀⠀⠀⠀⠀⣠⣤⣤⣤⣤⣤⣤⡄] [YNRUQ?] n 28 | Segmentation fault 29 | 12.7% zeroes 131072B new 897000B left [⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠙⠛⠛⠛⠛⠛⠋⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣠⣤⣤⣤⣤⣤⣤⡄] [YNRUQ?] n 30 | hello-test: internal error: stg_ap_p_ret 31 | (GHC version 8.0.2 for x86_64_unknown_linux) 32 | Please report this as a GHC bug: http://www.haskell.org/ghc/reportabug 33 | Aborted 34 | 12.7% zeroes 131072B new 897000B left [⠀⠀⠀⠀⠀⠈⠛⠛⠛⠛⠛⠛⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣠⣤⣤⣤⣤⣤⣤⡄] [YNRUQ?] n 35 | Hello World! 36 | 12.7% zeroes 65536B new 897000B left [⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠈⠛⣻⣿⣥⣤⣤⣤⣤⡄] [YNRUQ?] y 37 | Hello World! 38 | 15.9% zeroes 65536B new 864232B left [⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠙⠛⠛⢋⣤⣤⣤⣤⣤⣤⣤⣤⡄] [YNRUQ?] y 39 | ``` 40 | 41 | [Asciinema demo]: https://asciinema.org/a/7FcXeDU1BTa0SvynqSLezuaQ0?speed=2 42 | 43 | Installation 44 | ------------ 45 | 46 | Install Haskell, and run `cabal install bisect-binary`. You will find the 47 | binary in `~/.cabal/bin/bisect-binary`, and may want to move that to your 48 | `$PATH`. 49 | 50 | Usage 51 | ----- 52 | 53 | Run `bisect-binary` with the input file, the output file, and optionally a 54 | command to run on every try (which could, for example, flash the 55 | microcontroller with the binary): 56 | 57 | ``` 58 | $ bisect-binary --help 59 | Binary file bisector 60 | 61 | Usage: bisect-binary (-i|--input FILE) (-o|--output FILE) [-c|--command COMMAND] 62 | Fills a file with as much zeroes as possible 63 | 64 | Available options: 65 | -h,--help Show this help text 66 | -i,--input FILE input file 67 | -o,--output FILE output file 68 | -c,--command COMMAND command to run 69 | ``` 70 | 71 | Now `bisect-binary` will zero out parts of the file and ask you if the result is 72 | now good or bad, as seen in the screenshot above. Your options are: 73 | 74 | * `Y`: Mark this example as good. It will remember which portions it zeroed out and from now 75 | on only add to it 76 | * `N`: Mark this example as bad. It will revert to the previous good version and try out 77 | other parts of the file. 78 | * `R`: Re-run the command. 79 | * `U`: Undo the last marking. 80 | * `Q`: Quit the program. 81 | 82 | Upon quitting, `bisect-binary` will leave `output` in the last known state. 83 | 84 | A persisent file name `output-file.bisect.log` is kept, so you can quit and 85 | restart at any time without losing your work. 86 | 87 | The status line tells you: 88 | * how much of the file could be zeroed out sucessfully. 89 | * how many bytes it is now adding to that. 90 | * how many bytes of the file may be relevant for your task 91 | * a status bar describing the progress: 92 | 93 | * The top row visualizes the part of the file that we know we can zero out for sure. 94 | * The bottom row visualizes the part that we are zeroing out next. 95 | 96 | Two dots indicate _all_ bytes in this part, one dot indicates _some_ bytes. 97 | 98 | Contact 99 | ------- 100 | 101 | Please reports bugs and missing features at the [GitHub bugtracker]. This is 102 | also where you can find the [source code]. 103 | 104 | `bisect-binary` was written by [Joachim Breitner] and is licensed under a permissive MIT 105 | [license]. 106 | 107 | [GitHub bugtracker]: https://github.com/nomeata/bisect-binary/issues 108 | [source code]: https://github.com/nomeata/bisect-binary 109 | [Joachim Breitner]: http://www.joachim-breitner.de/ 110 | [license]: https://github.com/nomeata/gipeda/blob/LICENSE 111 | 112 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /bisect-binary.cabal: -------------------------------------------------------------------------------- 1 | name: bisect-binary 2 | version: 0.1.0.1 3 | synopsis: Determine relevant parts of binary data 4 | description: 5 | This little program helps with the following task: 6 | . 7 | You need to analyse some binary file (e.g., the firmware for some 8 | microcontroller). You want to understand how it does certain things, but the 9 | file is big. One approach to help you in the process is to erase parts of the file 10 | (by overwriting it with binary zeros) and see if the file still does what it 11 | should be doing. If it does, then you know that the interesting parts are in 12 | another part of the file. 13 | . 14 | `binary-binary` assists in this process by doing the book-keeping and zeroing 15 | out parts of the file. 16 | homepage: http://github.com/nomeata/bisect-binary 17 | license: MIT 18 | license-file: LICENSE 19 | author: Joachim Breitner 20 | maintainer: mail@joachim-breitner.de 21 | copyright: 2017 Joachim Breitner 22 | category: Development 23 | build-type: Simple 24 | extra-source-files: ChangeLog.md, README.md 25 | cabal-version: >=1.10 26 | tested-with: GHC ==8.0.2, GHC ==8.2.2 27 | 28 | executable bisect-binary 29 | main-is: Main.hs 30 | other-modules: Intervals, Braille 31 | other-extensions: LambdaCase, DeriveGeneric 32 | build-depends: base >=4.9 && <4.12 33 | build-depends: bytestring 34 | build-depends: directory 35 | build-depends: filepath 36 | build-depends: optparse-applicative >=0.13 && <0.14 37 | build-depends: yaml >=0.8 && <0.9 38 | build-depends: haskeline >=0.7 && <0.8 39 | build-depends: time 40 | build-depends: hashable 41 | build-depends: integer-logarithms 42 | build-depends: terminal-size 43 | build-depends: process 44 | hs-source-dirs: src 45 | default-language: Haskell2010 46 | ghc-options: -Wall -Wno-name-shadowing 47 | 48 | source-repository head 49 | type: git 50 | location: https://github.com/nomeata/bisect-binary 51 | 52 | -------------------------------------------------------------------------------- /src/Braille.hs: -------------------------------------------------------------------------------- 1 | module Braille where 2 | 3 | import Data.Char 4 | import Data.Bits 5 | 6 | dotsToBrailleBar :: [Int] -> String 7 | dotsToBrailleBar [] = "" 8 | dotsToBrailleBar [x] = [dotsToBrailleChar x 0] 9 | dotsToBrailleBar (x:y:xs) = dotsToBrailleChar x y : dotsToBrailleBar xs 10 | 11 | dotsToBrailleChar :: Int -> Int -> Char 12 | dotsToBrailleChar n m = 13 | chr $ 0x2800 + sum 14 | [ bit 0 * fromEnum (testBit n 3) 15 | , bit 1 * fromEnum (testBit n 2) 16 | , bit 2 * fromEnum (testBit n 1) 17 | , bit 3 * fromEnum (testBit m 3) 18 | , bit 4 * fromEnum (testBit m 2) 19 | , bit 5 * fromEnum (testBit m 1) 20 | , bit 6 * fromEnum (testBit n 0) 21 | , bit 7 * fromEnum (testBit m 0) 22 | ] 23 | -------------------------------------------------------------------------------- /src/Intervals.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE LambdaCase, DeriveGeneric #-} 2 | 3 | -- | 4 | -- This module provides an (efficient?) (compact?) representation of sets of 5 | -- file offsets, together with a few basic operations. 6 | -- 7 | -- The representation is a sorted list of disjoint, non-adjacent intervals. 8 | -- 9 | -- The operations 'isEmpty', 'fullIntervals', 'nullInterval', 'subSetOf', 10 | -- 'union', 'intersection' and 'subtract' were formally proven correct using 11 | -- @hs-to-coq@: 12 | -- 13 | -- (Three bugs were found in the process.) 14 | module Intervals where 15 | 16 | import qualified Data.ByteString.Lazy as BS 17 | import Text.Printf 18 | import Data.List hiding (union, intersect) 19 | import Data.Monoid ((<>)) 20 | import Data.Yaml (ToJSON, FromJSON) 21 | import GHC.Generics (Generic) 22 | import Data.Function 23 | import Data.Int 24 | import Prelude hiding (subtract) 25 | 26 | type Offset = Int64 27 | 28 | data Interval = I { from :: Offset, to :: Offset } 29 | deriving (Show,Generic) 30 | 31 | newtype Intervals = Intervals [Interval] 32 | deriving (Show,Generic) 33 | 34 | mkInterval :: Offset -> Offset -> Intervals 35 | mkInterval f t | f < t = Intervals [I f t] 36 | | otherwise = Intervals [] 37 | 38 | fullIntervals :: Offset -> Intervals 39 | fullIntervals len = mkInterval 0 len 40 | 41 | nullInterval :: Intervals 42 | nullInterval = Intervals [] 43 | 44 | size :: Intervals -> Offset 45 | size (Intervals is) = sum [ t - f | I f t <- is ] 46 | 47 | isEmpty :: Intervals -> Bool 48 | isEmpty (Intervals is) = null is 49 | 50 | subSetOf :: Intervals -> Intervals -> Bool 51 | subSetOf a b = isEmpty (a `subtract` b) 52 | 53 | intersects :: Intervals -> Intervals -> Bool 54 | intersects a b = not $ isEmpty (a `intersect` b) 55 | 56 | intersect :: Intervals -> Intervals -> Intervals 57 | intersect (Intervals is1) (Intervals is2) = Intervals $ go is1 is2 58 | where 59 | go _ [] = [] 60 | go [] _ = [] 61 | go (i1:is1) (i2:is2) 62 | -- reorder for symmetry 63 | | to i1 < to i2 = go (i2:is2) (i1:is1) 64 | -- disjoint 65 | | from i1 >= to i2 = go (i1:is1) is2 66 | -- subset 67 | | to i1 == to i2 = I f' (to i2) : go is1 is2 68 | -- overlapping 69 | | otherwise = I f' (to i2) : go (i1 { from = to i2} : is1) is2 70 | where f' = max (from i1) (from i2) 71 | 72 | 73 | union :: Intervals -> Intervals -> Intervals 74 | union (Intervals is1) (Intervals is2) = Intervals $ go is1 is2 75 | where 76 | go is [] = is 77 | go [] is = is 78 | go (i1:is1) (i2:is2) 79 | -- reorder for symmetry 80 | | to i1 < to i2 = go (i2:is2) (i1:is1) 81 | -- disjoint 82 | | from i1 > to i2 = i2 : go (i1:is1) is2 83 | -- overlapping 84 | | otherwise = go (i1 { from = f'} : is1) is2 85 | where f' = min (from i1) (from i2) 86 | 87 | subtract :: Intervals -> Intervals -> Intervals 88 | subtract (Intervals is1) (Intervals is2) = Intervals $ go is1 is2 89 | where 90 | go is [] = is 91 | go [] _ = [] 92 | go (i1:is1) (i2:is2) 93 | -- i2 past i1 94 | | to i1 <= from i2 = i1 : go is1 (i2:is2) 95 | -- i1 past i2 96 | | to i2 <= from i1 = go (i1:is1) is2 97 | -- i1 contained in i2 98 | | from i2 <= from i1 , to i1 <= to i2 = go is1 (i2:is2) 99 | -- i2 covers beginning of i1 100 | | from i1 >= from i2 = go (i1 { from = to i2} : is1) is2 101 | -- i2 covers end of i1 102 | | to i1 <= to i2 = i1 { to = from i2} : go is1 (i2:is2) 103 | -- i2 in the middle of i1 104 | | otherwise = I (from i1) (from i2) : 105 | go (I (to i2) (to i1) : is1) is2 106 | 107 | 108 | setZeros :: BS.ByteString -> Intervals -> BS.ByteString 109 | setZeros s (Intervals is) = foldl' go s is 110 | where 111 | go s (I f t) = prefix <> zeroes <> postfix 112 | where 113 | (tmp, postfix) = BS.splitAt t s 114 | (prefix, _discard) = BS.splitAt f tmp 115 | zeroes = BS.replicate (t-f) 0 116 | 117 | ppInterval :: Interval -> String 118 | ppInterval (I f t) = printf "0x%04X-0x%04X" f t 119 | 120 | ppIntervals :: Intervals -> String 121 | ppIntervals (Intervals xs) = intercalate " " (map ppInterval xs) 122 | 123 | instance FromJSON Interval 124 | instance ToJSON Interval 125 | instance FromJSON Intervals 126 | instance ToJSON Intervals 127 | -------------------------------------------------------------------------------- /src/Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE LambdaCase, DeriveGeneric #-} 2 | 3 | import qualified Data.ByteString.Lazy as BS 4 | import Text.Printf 5 | import System.Directory 6 | import System.Exit 7 | import System.FilePath 8 | import Data.Foldable 9 | import Data.List hiding (union, intersect) 10 | import Options.Applicative 11 | import Control.Monad 12 | import Data.Monoid ((<>)) 13 | import Data.Yaml (decodeFileEither, encodeFile, prettyPrintParseException, ToJSON, FromJSON) 14 | import Data.Bifunctor 15 | import GHC.Generics (Generic) 16 | import System.Console.Haskeline 17 | import Data.Function 18 | import Control.Monad.IO.Class 19 | import Data.Int 20 | import Data.Time 21 | import Data.Hashable 22 | import Math.NumberTheory.Logarithms 23 | import Data.Char 24 | import System.Process 25 | import Prelude hiding (subtract) 26 | import qualified System.Console.Terminal.Size 27 | 28 | import Intervals 29 | import Braille 30 | 31 | -- Core idea: The intervals to try to zero out 32 | 33 | intervalsToTry :: Offset -> [Intervals] 34 | intervalsToTry len = 35 | [ mkInterval b (b+size) 36 | | level <- toZero big 37 | , let size = 2^level 38 | , let shift = if odd (level - big) then size`div`2 else 0 39 | , let upper = len - size - shift 40 | , b <- [upper, upper - size .. 0] 41 | ] 42 | where big = integerLog2 (fromIntegral len) 43 | toZero n = [n,(n-1)..0] 44 | 45 | -- Data storage 46 | 47 | data Log = Log 48 | { lgFileSize :: Offset 49 | , lgFileHash :: Int 50 | , lgLogs :: [LogEntry] 51 | } 52 | deriving (Show,Generic) 53 | 54 | data LogEntry = LogEntry 55 | { leDate :: ZonedTime 56 | , leOk :: Bool 57 | , leZeroed :: Intervals 58 | } 59 | deriving (Show,Generic) 60 | 61 | instance FromJSON Log 62 | instance ToJSON Log 63 | instance FromJSON LogEntry 64 | instance ToJSON LogEntry 65 | 66 | initLog :: BS.ByteString -> Log 67 | initLog input = Log 68 | { lgFileSize = BS.length input 69 | , lgFileHash = hash input 70 | , lgLogs = [] 71 | } 72 | 73 | checkLog :: Log -> BS.ByteString -> InputT IO Log 74 | checkLog log input 75 | | lgFileSize log /= BS.length input = do 76 | outputStrLn $ printf "ERROR: Log input file size was %i, current input file is %i. Aborting" (lgFileSize log) (BS.length input) 77 | liftIO $ exitFailure 78 | | lgFileHash log /= hash input = do 79 | outputStrLn "WARN: Log input file hash differns from actual input file hash." 80 | outputStrLn "Do you want to continue?" 81 | getInputChar "Y/N?" >>= \case 82 | Just 'Y' -> return log 83 | _ -> do 84 | outputStrLn "Goodbye!" 85 | liftIO $ exitFailure 86 | | otherwise = do 87 | outputStrLn $ printf "Loaded log file with %d previous attempts." (length (lgLogs log)) 88 | return log 89 | 90 | -- | Aggregate, minmal knowledge of which parts of the file are needed 91 | -- 92 | data Digest = Digest 93 | { conservative :: Intervals 94 | -- ^ These bits can safely be zeroes 95 | , needed :: [Intervals] 96 | -- ^ At least one byte in each of these is needed. 97 | -- (Invariant: These are disjoint from the conservative ones) 98 | } 99 | deriving (Show,Generic) 100 | 101 | digestLog :: Log -> Digest 102 | digestLog log = Digest conservative needed 103 | where 104 | (okEntries, badEntries) = 105 | bimap (map leZeroed) (map leZeroed) $ 106 | partition leOk (lgLogs log) 107 | 108 | conservative = foldl' union nullInterval okEntries 109 | needed = prune (map (`subtract` conservative) badEntries) 110 | 111 | -- could remove subsumed entries here 112 | prune = id 113 | 114 | -- | It is pointless trying to zero an interval if it is a subset of what we 115 | -- already know can be zeroes, or if any failed case in the past shows that 116 | -- is a subset of this. 117 | pointless :: Intervals -> Digest -> Bool 118 | pointless try digest = 119 | try `subSetOf` conservative digest || 120 | any (`subSetOf` try) (needed digest) 121 | 122 | -- The main code 123 | 124 | work :: FilePath -> FilePath -> Maybe String -> IO () 125 | work inputFP outputFP commandM = runInputT defaultSettings $ do 126 | input <- liftIO $ BS.readFile inputFP 127 | let len = BS.length input 128 | 129 | when (len == 0) $ do 130 | outputStrLn $ printf "%s is empty." inputFP 131 | liftIO $ exitSuccess 132 | 133 | let logFile = outputFP <.> "bisect.log" 134 | 135 | let revert digest = do 136 | statusText (conservative digest) nullInterval True >>= outputStrLn 137 | liftIO $ BS.writeFile outputFP $ setZeros input (conservative digest) 138 | outputStrLn $ printf "Reverted %s to last known good output." outputFP 139 | 140 | run_cmd = liftIO $ for_ commandM $ \cmd -> do 141 | ph <- spawnCommand cmd 142 | waitForProcess ph 143 | 144 | -- A single run 145 | test zeros = do 146 | liftIO $ BS.writeFile outputFP $ setZeros input zeros 147 | run_cmd 148 | 149 | ask log msg = fix $ \loop -> do 150 | getInputChar msg >>= pure . fmap toUpper >>= \case 151 | Just 'Y' -> do 152 | return True 153 | Just 'N' -> do 154 | return False 155 | Just 'Q' -> do 156 | revert (digestLog log) 157 | liftIO $ exitSuccess 158 | Just 'R' -> do 159 | run_cmd 160 | loop 161 | Just 'U' -> do 162 | let log' = log { lgLogs = init (lgLogs log) } 163 | steps log' 164 | -- code smell 165 | liftIO $ exitSuccess 166 | Just 'F' -> do 167 | let log' = log { lgLogs = filter leOk (lgLogs log) } 168 | steps log' 169 | -- code smell 170 | liftIO $ exitSuccess 171 | Just '?' -> do 172 | outputStrLn "Keys: Y: good. N: bad. R: rerun command. U: Undo. F: Forget negatives Q: Quit" 173 | loop 174 | _ -> loop 175 | 176 | statusText :: MonadIO m => Intervals -> Intervals -> Bool -> m String 177 | statusText conservative toTry done = liftIO $ do 178 | w <- getWidth 179 | let barw = w - 57 180 | let zeroPerc = 100 * fromIntegral (size conservative) / fromIntegral len 181 | let nonZeroBytes = len - size conservative 182 | return $ printf "%4.1f%% zeroes %12s %7dB left %s%s" 183 | (zeroPerc :: Double) 184 | (if done then "" else printf "%7dB new" (size toTry)) 185 | nonZeroBytes 186 | (if barw > 5 then braille barw len conservative toTry else "") 187 | (if done then "" else " [YNRUFQ?] ") 188 | 189 | 190 | -- Single step of the main loop 191 | step log toTry 192 | | pointless toTry digest = return log 193 | | otherwise = do 194 | let zeros = conservative digest `union` toTry 195 | test zeros 196 | result <- statusText (conservative digest) toTry False >>= ask log 197 | stamp <- liftIO $ getZonedTime 198 | let entry = LogEntry { leDate = stamp, leOk = result, leZeroed = zeros } 199 | let log' = log { lgLogs = lgLogs log ++ [entry] } 200 | liftIO $ encodeFile logFile log' 201 | return $ log' 202 | where 203 | digest = digestLog log 204 | 205 | lastStep :: Log -> InputT IO () 206 | lastStep log = fix $ \loop -> do 207 | getInputChar "Done! [UFQ?]" >>= pure . fmap toUpper >>= \case 208 | Just 'Q' -> do 209 | revert (digestLog log) 210 | liftIO $ exitSuccess 211 | Just 'U' -> do 212 | let log' = log { lgLogs = init (lgLogs log) } 213 | steps log' 214 | -- code smell 215 | liftIO $ exitSuccess 216 | Just 'F' -> do 217 | let log' = log { lgLogs = filter leOk (lgLogs log) } 218 | steps log' 219 | -- code smell 220 | liftIO $ exitSuccess 221 | Just '?' -> do 222 | outputStrLn "Keys: U: Undo. F: Forget negatives Q: Quit" 223 | loop 224 | _ -> loop 225 | 226 | -- Main loop 227 | steps log = do 228 | log' <- foldM step log (intervalsToTry len) 229 | lastStep log' 230 | -- TODO: What now? 231 | 232 | 233 | -- Initialization 234 | initialLog <- liftIO (doesFileExist logFile) >>= \case 235 | False -> do 236 | outputStrLn $ printf "Cannot find %s, starting from scratch." logFile 237 | return (initLog input) 238 | True -> do 239 | liftIO (decodeFileEither logFile) >>= \case 240 | Left error -> do 241 | outputStrLn $ printf "ERROR: Cannot parse %s:" 242 | outputStrLn $ prettyPrintParseException error 243 | liftIO $ exitFailure 244 | Right log -> do 245 | outputStrLn $ printf "Loading log file %s." logFile 246 | checkLog log input 247 | 248 | steps initialLog 249 | 250 | -- Argument handling 251 | 252 | main :: IO () 253 | main = join . customExecParser (prefs showHelpOnError) $ 254 | info (helper <*> parser) 255 | ( fullDesc 256 | <> header "Binary file bisector" 257 | <> progDesc "Fills a file with as much zeroes as possible" 258 | ) 259 | where 260 | parser :: Parser (IO ()) 261 | parser = 262 | work 263 | <$> strOption 264 | ( long "input" 265 | <> short 'i' 266 | <> metavar "FILE" 267 | <> help "input file" 268 | ) 269 | <*> strOption 270 | ( long "output" 271 | <> short 'o' 272 | <> metavar "FILE" 273 | <> help "output file" 274 | ) 275 | <*> optional (strOption 276 | ( long "command" 277 | <> short 'c' 278 | <> metavar "COMMAND" 279 | <> help "command to run" 280 | )) 281 | 282 | -- Pretty progress bar using braille symbols: 283 | 284 | data Cover = NoCover | SomeCover | FullCover 285 | deriving (Eq, Ord) 286 | 287 | braille :: Int -> Offset -> Intervals -> Intervals -> String 288 | braille width len lower upper = "[" ++ bar ++ "]" 289 | where 290 | bar = dotsToBrailleBar (map (toBits . go) parts) 291 | 292 | parts :: [Intervals] -- (width-2)*2 intervals 293 | parts = [mkInterval n (min (n + step) len) | n <- [0,step..len-1] ] 294 | where step = len `div` fromIntegral (2*(width - 2)) 295 | 296 | descOverlap :: Intervals -> Intervals -> Cover 297 | descOverlap big small 298 | | small `subSetOf` big = FullCover 299 | | small `intersects` big = SomeCover 300 | | otherwise = NoCover 301 | 302 | go :: Intervals -> (Cover, Cover) 303 | go i = (lower `descOverlap` i, upper `descOverlap` i) 304 | 305 | toBits :: (Cover, Cover) -> Int 306 | toBits (c1, c2) = sum 307 | [ 1 * fromEnum (c1 >= SomeCover) 308 | , 2 * fromEnum (c1 >= FullCover) 309 | , 4 * fromEnum (c2 >= FullCover) 310 | , 8 * fromEnum (c2 >= SomeCover) 311 | ] 312 | 313 | 314 | getWidth :: IO Int 315 | getWidth = maybe 80 System.Console.Terminal.Size.width <$> 316 | System.Console.Terminal.Size.size 317 | 318 | --------------------------------------------------------------------------------