├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── delta.go ├── delta_apply.go ├── delta_apply_test.go ├── delta_bytes.go ├── delta_dump.go ├── delta_go_string.go ├── delta_internal.go ├── delta_load.go ├── delu └── main.go ├── experiment_test.go ├── func.go ├── func_test.go ├── go.mod ├── go.sum ├── index_map.go ├── integrated_test.go ├── lorem_ipsum.txt ├── make.go ├── make_test.go └── module.go /.gitignore: -------------------------------------------------------------------------------- 1 | ## ----------------------------------------------------------------------------- 2 | ## github.com/balacode/go-delta go-delta/[.gitignore] 3 | ## (c) balarabe@protonmail.com License: MIT 4 | ## ----------------------------------------------------------------------------- 5 | 6 | ## backups, drafts and temporary files 7 | *.go-bak 8 | *`* 9 | *tmp* 10 | 11 | ## shell scripts 12 | *.bat 13 | 14 | ## executable files 15 | /delu/delu 16 | /delu/delu.exe 17 | 18 | ## log and debugging files 19 | /build.log 20 | /cover.out 21 | /delu/build.log 22 | /run.log 23 | 24 | ## sample files used to generate binary deltas 25 | /day1.data 26 | /day1.zip 27 | /day2.data 28 | /day2.zip 29 | /test1.data 30 | /test1.zip 31 | /test2.data 32 | /test2.zip 33 | 34 | ## end 35 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | sudo: false 4 | 5 | go: 6 | - 1.9.2 7 | - tip 8 | 9 | before_install: 10 | - go get github.com/mattn/goveralls 11 | 12 | script: 13 | - $GOPATH/bin/goveralls -service=travis-ci 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Balarabe 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## go-delta - A Go package and utility to generate and apply binary delta updates. 2 | 3 | [![Go Report Card](https://goreportcard.com/badge/github.com/balacode/go-delta)](https://goreportcard.com/report/github.com/balacode/go-delta) 4 | [![Build Status](https://travis-ci.org/balacode/go-delta.svg?branch=master)](https://travis-ci.org/balacode/go-delta) 5 | [![Test Coverage](https://coveralls.io/repos/github/balacode/go-delta/badge.svg?branch=master&service=github)](https://coveralls.io/github/balacode/go-delta?branch=master) 6 | [![Gitter chat](https://badges.gitter.im/balacode/go-delta.png)](https://gitter.im/go-delta/Lobby) 7 | [![godoc](https://godoc.org/github.com/balacode/go-delta?status.svg)](https://godoc.org/github.com/balacode/go-delta) 8 | [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) 9 | 10 | ## Suggestions: 11 | 12 | - Works best on text files, database dumps and any other files with lots of 13 | repeating patterns and few changes between updates. 14 | 15 | - Generating deltas of compressed files is not recommended because a small 16 | change in the source data can lead to lots of changes in the compressed 17 | result, so generating a delta update may give you only minimal size 18 | reduction. 19 | 20 | - Don't compress bytes returned by Delta.Bytes() because they are already 21 | compressed using ZLib compression. 22 | 23 | - Every delta update adds about 156 bytes for the source and target hashes 24 | and various lengths, so it is not recommended for very miniscule updates. 25 | 26 | ## Demonstration: 27 | 28 | ```go 29 | package main 30 | 31 | import ( 32 | "fmt" 33 | "github.com/balacode/go-delta" 34 | ) 35 | 36 | func main() { 37 | fmt.Print("Binary delta update demo:\n\n") 38 | 39 | // The original data (20 bytes): 40 | var source = []byte("quick brown fox, lazy dog, and five boxing wizards") 41 | fmt.Print("The original is:", "\n", string(source), "\n\n") 42 | 43 | // The updated data containing the original and new content (82 bytes): 44 | var target = []byte( 45 | "The quick brown fox jumps over the lazy dog. " + 46 | "The five boxing wizards jump quickly.", 47 | ) 48 | fmt.Print("The update is:", "\n", string(target), "\n\n") 49 | 50 | var dbytes []byte 51 | { 52 | // Use Make() to generate a compressed patch from source and target 53 | var d = delta.Make(source, target) 54 | 55 | // Convert the delta to a slice of bytes (e.g. for writing to a file) 56 | dbytes = d.Bytes() 57 | } 58 | 59 | // Create a Delta from the byte slice 60 | var d = delta.Load(dbytes) 61 | 62 | // Apply the patch to source to get the target 63 | // The size of the patch is much shorter than target. 64 | var target2, err = d.Apply(source) 65 | if err != nil { 66 | fmt.Println(err) 67 | } 68 | fmt.Print("Patched:", "\n", string(target2), "\n\n") 69 | } // main 70 | ``` 71 | -------------------------------------------------------------------------------- /delta.go: -------------------------------------------------------------------------------- 1 | // ----------------------------------------------------------------------------- 2 | // github.com/balacode/go-delta go-delta/[delta.go] 3 | // (c) balarabe@protonmail.com License: MIT 4 | // ----------------------------------------------------------------------------- 5 | 6 | package delta 7 | 8 | // Delta stores the binary delta difference between two byte arrays 9 | type Delta struct { 10 | sourceSize int // size of the source array 11 | sourceHash []byte // hash of the source byte array 12 | targetSize int // size of the target array 13 | targetHash []byte // hash of the result after this Delta is applied 14 | newCount int // number of chunks not matched in source array 15 | oldCount int // number of matched chunks in source array 16 | parts []deltaPart // array referring to chunks in source array, 17 | // or new bytes to append 18 | } // Delta 19 | 20 | // deltaPart stores references to chunks in the source array, 21 | // or specifies bytes to append to result array directly 22 | type deltaPart struct { 23 | sourceLoc int // byte position of the chunk in source array, 24 | // or -1 when 'data' supplies the bytes directly 25 | // 26 | size int // size of the chunk in bytes 27 | data []byte // optional bytes (only when sourceLoc is -1) 28 | } // deltaPart 29 | 30 | // ----------------------------------------------------------------------------- 31 | // # Read-Only Properties 32 | 33 | // NewCount returns the number of chunks not matched in source array. 34 | func (ob *Delta) NewCount() int { 35 | return ob.newCount 36 | } // NewCount 37 | 38 | // OldCount returns the number of matched chunks in source array. 39 | func (ob *Delta) OldCount() int { 40 | return ob.oldCount 41 | } // OldCount 42 | 43 | // SourceSize returns the size of the source byte array, in bytes. 44 | func (ob *Delta) SourceSize() int { 45 | return ob.sourceSize 46 | } // SourceSize 47 | 48 | // TargetSize returns the size of the target byte array, in bytes. 49 | func (ob *Delta) TargetSize() int { 50 | return ob.targetSize 51 | } // TargetSize 52 | 53 | // end 54 | -------------------------------------------------------------------------------- /delta_apply.go: -------------------------------------------------------------------------------- 1 | // ----------------------------------------------------------------------------- 2 | // github.com/balacode/go-delta go-delta/[delta_apply.go] 3 | // (c) balarabe@protonmail.com License: MIT 4 | // ----------------------------------------------------------------------------- 5 | 6 | package delta 7 | 8 | import ( 9 | "bytes" 10 | "fmt" 11 | ) 12 | 13 | // Apply uses the 'source' byte array, applies this 14 | // Delta to it and returns the updated byte array. 15 | // If this delta was not generated from source, 16 | // returns an error. 17 | func (ob *Delta) Apply(source []byte) ([]byte, error) { 18 | if DebugTiming { 19 | tmr.Start("Delta.Apply") 20 | defer tmr.Stop("Delta.Apply") 21 | } 22 | if len(source) != ob.sourceSize { 23 | return nil, mod.Error(fmt.Sprintf( 24 | "Size of source [%d] does not match expected [%d]", 25 | len(source), ob.sourceSize)) 26 | } 27 | if !bytes.Equal(makeHash(source), ob.sourceHash) { 28 | return nil, mod.Error("Delta does not belong to specified source") 29 | } 30 | buf := bytes.NewBuffer(make([]byte, 0, ob.targetSize)) 31 | for i, pt := range ob.parts { 32 | var data []byte 33 | switch { 34 | case pt.sourceLoc == -1: 35 | data = pt.data 36 | case pt.sourceLoc < 0 || pt.sourceLoc >= ob.sourceSize: 37 | return nil, mod.Error("part", i, "sourceLoc:", pt.sourceLoc, 38 | "out of range 0 -", ob.sourceSize-1) 39 | case pt.sourceLoc+pt.size > ob.sourceSize: 40 | return nil, mod.Error("part", i, "sourceLoc:", pt.sourceLoc, 41 | "+ size:", pt.size, "extends beyond", ob.sourceSize) 42 | default: 43 | data = source[pt.sourceLoc : pt.sourceLoc+pt.size] 44 | } 45 | n, err := buf.Write(data) 46 | if err != nil { 47 | return nil, mod.Error(err) 48 | } 49 | if n != pt.size { 50 | return nil, mod.Error("Wrote", n, "bytes instead of", pt.size) 51 | } 52 | } 53 | ret := buf.Bytes() 54 | if !bytes.Equal(makeHash(ret), ob.targetHash) { 55 | return nil, mod.Error("Result does not match target hash.") 56 | } 57 | return buf.Bytes(), nil 58 | } // Apply 59 | 60 | // end 61 | -------------------------------------------------------------------------------- /delta_apply_test.go: -------------------------------------------------------------------------------- 1 | // ----------------------------------------------------------------------------- 2 | // github.com/balacode/go-delta go-delta/[delta_apply_test.go] 3 | // (c) balarabe@protonmail.com License: MIT 4 | // ----------------------------------------------------------------------------- 5 | 6 | package delta 7 | 8 | import ( 9 | "bytes" 10 | "testing" 11 | ) 12 | 13 | // go test --run Test_Delta_Apply_ 14 | func Test_Delta_Apply_(t *testing.T) { 15 | if PrintTestNames { 16 | printTestName() 17 | } 18 | test := func(src []byte, d Delta, expect []byte) { 19 | result, err := d.Apply(src) 20 | if err != nil { 21 | t.Errorf("\n encountered error: %s\n", err) 22 | return 23 | } 24 | if !bytes.Equal(result, expect) { 25 | t.Errorf("\n expect:\n\t%v\n\t'%s'\n result:\n\t%v\n\t'%s'\n", 26 | expect, expect, result, result) 27 | } 28 | } 29 | test( 30 | // source: 31 | nil, 32 | // 33 | // delta: 34 | Delta{ 35 | sourceHash: nil, 36 | targetHash: makeHash(ab("abc")), 37 | parts: []deltaPart{ 38 | {sourceLoc: -1, size: 3, data: ab("abc")}, 39 | }, 40 | }, 41 | // expect: 42 | ab("abc"), 43 | ) 44 | test( 45 | // source: 46 | ab("abc"), 47 | // 48 | // delta: 49 | Delta{ 50 | sourceHash: makeHash(ab("abc")), 51 | sourceSize: 3, 52 | targetHash: makeHash(ab("abc")), 53 | targetSize: 3, 54 | parts: []deltaPart{ 55 | {sourceLoc: -1, size: 3, data: ab("abc")}, 56 | }, 57 | }, 58 | // expect: 59 | ab("abc"), 60 | ) 61 | } // Test_Delta_Apply_ 62 | 63 | // end 64 | -------------------------------------------------------------------------------- /delta_bytes.go: -------------------------------------------------------------------------------- 1 | // ----------------------------------------------------------------------------- 2 | // github.com/balacode/go-delta go-delta/[delta_bytes.go] 3 | // (c) balarabe@protonmail.com License: MIT 4 | // ----------------------------------------------------------------------------- 5 | 6 | package delta 7 | 8 | import ( 9 | "bytes" 10 | "encoding/binary" 11 | ) 12 | 13 | // Bytes converts the Delta structure to a byte array 14 | // (for serializing to a file, etc.) 15 | func (ob *Delta) Bytes() []byte { 16 | buf := bytes.NewBuffer(make([]byte, 0, 1024)) 17 | // 18 | writeInt := func(i int) error { 19 | err := binary.Write(buf, binary.BigEndian, int32(i)) 20 | if err != nil { 21 | return mod.Error("writeInt(", i, ") failed:", err) 22 | } 23 | return nil 24 | } 25 | writeBytes := func(data []byte) error { 26 | err := writeInt(len(data)) 27 | if err != nil { 28 | return mod.Error("writeBytes([", len(data), "]) failed @1:", err) 29 | } 30 | var n int 31 | n, err = buf.Write(data) 32 | if err != nil { 33 | return mod.Error("writeBytes([", len(data), "]) failed @2:", err) 34 | } 35 | if n != len(data) { 36 | return mod.Error("writeBytes([", len(data), "]) failed @3:", 37 | "wrote wrong number of bytes:", n) 38 | } 39 | return nil 40 | } 41 | // write the header 42 | writeInt(ob.sourceSize) 43 | writeBytes(ob.sourceHash) 44 | writeInt(ob.targetSize) 45 | writeBytes(ob.targetHash) 46 | writeInt(ob.newCount) 47 | writeInt(ob.oldCount) 48 | writeInt(len(ob.parts)) 49 | // 50 | // write the parts 51 | for _, part := range ob.parts { 52 | writeInt(part.sourceLoc) 53 | if part.sourceLoc == -1 { 54 | writeBytes(part.data) 55 | continue 56 | } 57 | writeInt(part.size) 58 | } 59 | // compress the delta 60 | if DebugInfo { 61 | PL("uncompressed delta length:", len(buf.Bytes())) 62 | } 63 | ret := compressBytes(buf.Bytes()) 64 | if DebugInfo { 65 | PL("compressed delta length:", len(ret)) 66 | } 67 | return ret 68 | } // Bytes 69 | 70 | // end 71 | -------------------------------------------------------------------------------- /delta_dump.go: -------------------------------------------------------------------------------- 1 | // ----------------------------------------------------------------------------- 2 | // github.com/balacode/go-delta go-delta/[delta_dump.go] 3 | // (c) balarabe@protonmail.com License: MIT 4 | // ----------------------------------------------------------------------------- 5 | 6 | package delta 7 | 8 | import ( 9 | "fmt" 10 | ) 11 | 12 | // Dump prints this object to the console in a human-friendly format. 13 | func (ob *Delta) Dump() { 14 | pl := fmt.Println 15 | pl() 16 | pl("sourceHash:", ob.sourceHash) 17 | pl("targetHash:", ob.targetHash) 18 | pl("newCount:", ob.newCount) 19 | pl("oldCount:", ob.oldCount) 20 | pl("len(parts):", len(ob.parts)) 21 | pl() 22 | for i, part := range ob.parts { 23 | pl("part:", i, "sourceLoc:", part.sourceLoc, 24 | "size:", part.size, 25 | "data:", part.data, string(part.data)) 26 | } 27 | } // Dump 28 | 29 | // end 30 | -------------------------------------------------------------------------------- /delta_go_string.go: -------------------------------------------------------------------------------- 1 | // ----------------------------------------------------------------------------- 2 | // github.com/balacode/go-delta go-delta/[delta_go_string.go] 3 | // (c) balarabe@protonmail.com License: MIT 4 | // ----------------------------------------------------------------------------- 5 | 6 | package delta 7 | 8 | import ( 9 | "bytes" 10 | "fmt" 11 | ) 12 | 13 | // GoString returns a Go-syntax representation of the Delta structure. 14 | // It implements the fmt.GoStringer interface. 15 | func (ob Delta) GoString() string { 16 | var buf bytes.Buffer 17 | write := func(args ...string) { 18 | for _, s := range args { 19 | buf.WriteString(s) 20 | } 21 | } 22 | str := func(v interface{}) string { 23 | return fmt.Sprintf("%#v", v) 24 | } 25 | write("Delta{", "\n", 26 | "\t", "sourceSize: ", str(ob.sourceSize), ",\n", 27 | "\t", "sourceHash: ", str(ob.sourceHash), ",\n", 28 | "\t", "targetSize: ", str(ob.targetSize), ",\n", 29 | "\t", "targetHash: ", str(ob.targetHash), ",\n", 30 | "\t", "newCount: ", str(ob.newCount), ",\n", 31 | "\t", "oldCount: ", str(ob.oldCount), ",\n", 32 | "\t", "parts: []deltaPart{\n", 33 | ) 34 | for _, pt := range ob.parts { 35 | write("\t\t{", 36 | "sourceLoc: ", str(pt.sourceLoc), ", ", 37 | "size: ", str(pt.size), ", ", 38 | "data: ", str(pt.data), "}\n") 39 | } 40 | write("\t},\n}") 41 | return buf.String() 42 | } // GoString 43 | 44 | // end 45 | -------------------------------------------------------------------------------- /delta_internal.go: -------------------------------------------------------------------------------- 1 | // ----------------------------------------------------------------------------- 2 | // github.com/balacode/go-delta go-delta/[delta_internal.go] 3 | // (c) balarabe@protonmail.com License: MIT 4 | // ----------------------------------------------------------------------------- 5 | 6 | package delta 7 | 8 | // write appends binary difference data 9 | func (ob *Delta) write(sourceLoc, size int, data []byte) { 10 | if DebugTiming { 11 | tmr.Start("write") 12 | defer tmr.Stop("write") 13 | } 14 | if DebugInfo && DebugWriteArgs { 15 | PL("write", 16 | "sourceLoc:", sourceLoc, 17 | "size:", size, 18 | "data:", data, string(data)) 19 | } 20 | // argument validations 21 | switch { 22 | case sourceLoc < -1: 23 | mod.Error("sourceLoc:", sourceLoc, " < -1") 24 | return 25 | case sourceLoc == -1 && len(data) == 0: 26 | mod.Error("sourceLoc == -1 && len(data) == 0") 27 | return 28 | case sourceLoc != -1 && len(data) != 0: 29 | mod.Error("sourceLoc != -1 && len(data):", len(data), "!= 0") 30 | return 31 | case size < 1: 32 | mod.Error("size:", size, " < 1") 33 | return 34 | } 35 | // if the previous part was embedded directly, append to that part's data 36 | if sourceLoc == -1 { 37 | n := len(ob.parts) 38 | if n > 0 { 39 | last := &ob.parts[n-1] 40 | if last.sourceLoc == -1 { 41 | last.size += len(data) 42 | last.data = append(last.data, data...) 43 | return 44 | } 45 | } 46 | } 47 | // append a new part 48 | var ar []byte 49 | if sourceLoc == -1 { 50 | ar = make([]byte, len(data)) 51 | copy(ar, data) 52 | } 53 | ob.parts = append(ob.parts, 54 | deltaPart{sourceLoc: sourceLoc, size: size, data: ar}) 55 | } // write 56 | 57 | // end 58 | -------------------------------------------------------------------------------- /delta_load.go: -------------------------------------------------------------------------------- 1 | // ----------------------------------------------------------------------------- 2 | // github.com/balacode/go-delta go-delta/[delta_load.go] 3 | // (c) balarabe@protonmail.com License: MIT 4 | // ----------------------------------------------------------------------------- 5 | 6 | package delta 7 | 8 | import ( 9 | "bytes" 10 | "encoding/binary" 11 | ) 12 | 13 | // Load fills a new Delta structure from a byte 14 | // array previously returned by Delta.Bytes(). 15 | func Load(data []byte) (Delta, error) { 16 | // 17 | // uncompress the delta 18 | if DebugInfo { 19 | PL("Load: compressed delta length:", len(data)) 20 | } 21 | data = uncompressBytes(data) 22 | if DebugInfo { 23 | PL("Load: uncompressed delta length:", len(data)) 24 | } 25 | buf := bytes.NewBuffer(data) 26 | readInt := func() int { 27 | var i int32 28 | err := binary.Read(buf, binary.BigEndian, &i) 29 | if err != nil { 30 | mod.Error("readInt() failed:", err) 31 | return -1 32 | } 33 | return int(i) 34 | } 35 | readBytes := func() []byte { 36 | var size int32 37 | err := binary.Read(buf, binary.BigEndian, &size) 38 | if err != nil { 39 | mod.Error("readBytes() failed @1:", err) 40 | } 41 | ar := make([]byte, size) 42 | var nread int 43 | nread, err = buf.Read(ar) 44 | if err != nil { 45 | mod.Error("readBytes() failed @2:", err) 46 | } 47 | if nread != int(size) { 48 | mod.Error("readBytes() failed @3: size:", size, "nread:", nread) 49 | } 50 | return ar 51 | } 52 | // read the header 53 | ret := Delta{ 54 | sourceSize: readInt(), 55 | sourceHash: readBytes(), 56 | targetSize: readInt(), 57 | targetHash: readBytes(), 58 | newCount: readInt(), 59 | oldCount: readInt(), 60 | } 61 | // read the parts 62 | count := readInt() 63 | if count < 1 { 64 | return Delta{}, 65 | mod.Error("readBytes() failed @4: invalid number of parts:", count) 66 | } 67 | ret.parts = make([]deltaPart, count) 68 | for i := range ret.parts { 69 | pt := &ret.parts[i] 70 | pt.sourceLoc = readInt() 71 | if pt.sourceLoc == -1 { 72 | pt.data = readBytes() 73 | pt.size = len(pt.data) 74 | continue 75 | } 76 | pt.size = readInt() 77 | } 78 | return ret, nil 79 | } // Load 80 | 81 | // end 82 | -------------------------------------------------------------------------------- /delu/main.go: -------------------------------------------------------------------------------- 1 | // ----------------------------------------------------------------------------- 2 | // github.com/balacode/go-delta go-delta/deltau/[main.go] 3 | // (c) balarabe@protonmail.com License: MIT 4 | // ----------------------------------------------------------------------------- 5 | 6 | package main 7 | 8 | import ( 9 | "fmt" 10 | "os" 11 | 12 | "github.com/balacode/go-delta" 13 | ) 14 | 15 | const Usage = `delu - delta update utility 16 | Usage: 17 | 18 | To create a delta update file: 19 | delu make 20 | 21 | To apply a delta update: 22 | delu apply 23 | ` 24 | 25 | var printError = fmt.Println 26 | 27 | func main() { 28 | a := os.Args[1:] 29 | n := len(a) 30 | switch { 31 | case n == 0: 32 | fmt.Println(Usage) 33 | case a[0] == "-help" || a[0] == "--help" || a[0] == "/?": 34 | fmt.Println(Usage) 35 | case n != 4: 36 | printError("You specified the wrong number of parameters!") 37 | fmt.Println(Usage) 38 | case a[0] == "apply": 39 | applyDelta(a[1], a[2], a[3]) // source, delta, target 40 | case a[0] == "make": 41 | makeDelta(a[1], a[2], a[3]) // source, target, delta 42 | } 43 | } // main 44 | 45 | // ----------------------------------------------------------------------------- 46 | // # Helper Functions 47 | 48 | // applyDelta creates 'targetFile' by applying 'deltaFile' to 'sourceFile'. 49 | func applyDelta(sourceFile, deltaFile, targetFile string) { 50 | // 51 | // make sure the target file does not exist 52 | if fileExists(targetFile) { 53 | printError("Target exists already:", targetFile) 54 | return 55 | } 56 | var err error 57 | // 58 | // read the source file into a byte array 59 | var sourceAr []byte 60 | sourceAr, err = os.ReadFile(sourceFile) 61 | if err != nil { 62 | printError("Failed reading", sourceFile, ":\n", err) 63 | return 64 | } 65 | // read the delta file into a byte array 66 | var deltaAr []byte 67 | deltaAr, err = os.ReadFile(deltaFile) 68 | if err != nil { 69 | printError("Failed reading", deltaFile, ":\n", err) 70 | return 71 | } 72 | // create a Delta from the delta bytes 73 | var d delta.Delta 74 | d, err = delta.Load(deltaAr) 75 | if err != nil { 76 | printError("Failed to apply delta to source:\n", err) 77 | } 78 | // create target data from source and delta 79 | var targetAr []byte 80 | targetAr, err = d.Apply(sourceAr) 81 | if err != nil { 82 | printError("Failed to apply delta to source:\n", err) 83 | } 84 | // save the target 85 | err = os.WriteFile(targetFile, targetAr, 0644) 86 | if err != nil { 87 | printError("Failed saving", targetFile, ":\n", err) 88 | } 89 | } // applyDelta 90 | 91 | // fileExists returns true if the file given by 'path' exists. 92 | func fileExists(path string) bool { 93 | _, err := os.Stat(path) 94 | if err == nil { 95 | return true 96 | } 97 | if os.IsNotExist(err) { 98 | return false 99 | } 100 | printError("Error while checking if", path, "exists:\n", err) 101 | return false 102 | } // fileExists 103 | 104 | // makeDelta creates 'deltaFile', using 'sourceFile' and 'targetFile'. 105 | // The delta file only stores the differences between source and target. 106 | func makeDelta(sourceFile, targetFile, deltaFile string) { 107 | // 108 | // make sure the delta file does not exist 109 | if fileExists(deltaFile) { 110 | printError("Delta file exists already:", deltaFile) 111 | return 112 | } 113 | var err error 114 | // 115 | // read the source file into a byte array 116 | var sourceAr []byte 117 | sourceAr, err = os.ReadFile(sourceFile) 118 | if err != nil { 119 | printError("Failed reading", sourceFile, ":\n", err) 120 | return 121 | } 122 | // read the target file into a byte array 123 | var targetAr []byte 124 | targetAr, err = os.ReadFile(targetFile) 125 | if err != nil { 126 | printError("Failed reading", targetFile, ":\n", err) 127 | return 128 | } 129 | // create a Delta from the difference between source and target 130 | d := delta.Make(sourceAr, targetAr) 131 | deltaAr := d.Bytes() 132 | // 133 | // save the delta 134 | err = os.WriteFile(deltaFile, deltaAr, 0644) 135 | if err != nil { 136 | printError("Failed saving", deltaFile, ":\n", err) 137 | } 138 | } // makeDelta 139 | 140 | // end 141 | -------------------------------------------------------------------------------- /experiment_test.go: -------------------------------------------------------------------------------- 1 | // ----------------------------------------------------------------------------- 2 | // github.com/balacode/go-delta go-delta/[experiment_test.go] 3 | // (c) balarabe@protonmail.com License: MIT 4 | // ----------------------------------------------------------------------------- 5 | 6 | package delta 7 | 8 | // to generate a test coverage report for the whole module use: 9 | // go test -coverprofile cover.out 10 | // go tool cover -html=cover.out 11 | 12 | import ( 13 | "testing" 14 | ) 15 | 16 | const RunExperiments = false 17 | 18 | // ----------------------------------------------------------------------------- 19 | // # Experimental / Auxiliary Tests 20 | 21 | // go test --run Test01 22 | func Test01(t *testing.T) { 23 | if !RunExperiments { 24 | return 25 | } 26 | if PrintTestNames { 27 | printTestName() 28 | } 29 | PL("Test01 " + Line) 30 | // 31 | cmap1 := makeMap(readData("test1.zip")) 32 | PL("Created cmap1. len(cmap1):", len(cmap1.m)) 33 | // 34 | cmap2 := makeMap(readData("test2.zip")) 35 | PL("Created cmap2. len(cmap2):", len(cmap2.m)) 36 | // 37 | if false { 38 | const MaxLines = 0 39 | i := 1 40 | for k, v := range cmap1.m { 41 | PL("key:", k, "val:", v) 42 | i++ 43 | if i > MaxLines { 44 | break 45 | } 46 | } 47 | } 48 | if true { 49 | for k, v := range cmap2.m { 50 | _, exist := cmap1.get(k) 51 | PL("key:", k, "val:", v, "exist:", exist) 52 | } 53 | } 54 | } // Test01 55 | 56 | // go test --run Test02 57 | func Test02(t *testing.T) { 58 | if !RunExperiments { 59 | return 60 | } 61 | if PrintTestNames { 62 | printTestName() 63 | } 64 | var a, b []byte 65 | switch 5 { 66 | case 1: 67 | a = ab(AtoM + " " + AtoS + " " + AtoZ) 68 | b = ab("0x0x0x" + AtoZ + " " + AtoZ + " " + AtoZ + " " + Nums) 69 | case 2: 70 | a = ab(AtoM + " " + AtoS + " " + AtoZ) 71 | b = ab(atoz + " " + atoz + " " + atoz + " " + Nums) 72 | case 3: 73 | /* 74 | Target array's size: 16,994,304 bytes 75 | - 76 | Before optimizing makeMap(): 77 | -------------------------------------------------------------- 78 | uncompressed delta length: 1,855,440 bytes 79 | compressed delta length: 704,583 (4.15% of target's size) 80 | elapsed time: 171.4 seconds 81 | -------------------------------------------------------------- 82 | 171.25880: delta.Make 83 | 0.16411: makeHash 84 | 3.78551: makeMap 85 | 165.82172: longestMatch 86 | 0.09878: write 87 | 0.13109: compressBytes 88 | - 89 | After optimizing makeMap(): 90 | -------------------------------------------------------------- 91 | uncompressed delta length: 1,952,772 bytes 92 | compressed delta length: 729,574 (4.29% of target's size) 93 | elapsed time: 2.4 seconds 94 | -------------------------------------------------------------- 95 | 2.40135: delta.Make 96 | 0.11608: makeHash 97 | 1.28985: makeMap 98 | 0.14999: longestMatch 99 | 0.07882: write 100 | 0.09806: compressBytes 101 | - 102 | After adding backward-scanning in longestMatch() 103 | -------------------------------------------------------------- 104 | uncompressed delta length: 1,675,811 bytes 105 | compressed delta length: 666,880 (3.92% of target's size) 106 | elapsed time: 2.4 seconds 107 | -------------------------------------------------------------- 108 | 2.45898: delta.Make 109 | 0.15910: makeHash 110 | 1.49399: makeMap 111 | 0.16595: longestMatch 112 | 0.07311: write 113 | 0.12408: compressBytes 114 | */ 115 | a = readData("test1.file") 116 | b = readData("test2.file") 117 | PL("loaded data") 118 | case 4: 119 | /* 120 | target size: 10,356,821 121 | uncompressed delta: 5,414,754 122 | compressed delta: 5,258,684 (50.7% of file size) 123 | elapsed time: 6.2 seconds 124 | */ 125 | a = readData("test1.zip") 126 | b = readData("test2.zip") 127 | PL("loaded data") 128 | case 5: 129 | /* 130 | target size: 17,096,704 bytes 131 | uncompressed delta: 64,081 bytes 132 | compressed delta: 25,967 (50.7% of file size) 133 | elapsed time: 2.06 seconds 134 | -------------------------------------------------------------- 135 | 2.06019: delta.Make 136 | 0.11507: makeHash 137 | 1.44146: makeMap 138 | 0.05109: longestMatch 139 | 0.00349: write 140 | 0.00600: compressBytes 141 | 3.67731 142 | */ 143 | a = readData("day1.data") 144 | b = readData("day2.data") 145 | PL("loaded data") 146 | } 147 | if DebugTiming { 148 | tmr.Start("delta.Make") 149 | } 150 | { 151 | d := Make(a, b) 152 | d.Bytes() 153 | } 154 | if DebugTiming { 155 | tmr.Stop("delta.Make") 156 | tmr.Print() 157 | } 158 | } // Test02 159 | 160 | // go test --run Test03 161 | func Test03(t *testing.T) { 162 | if !RunExperiments { 163 | return 164 | } 165 | if PrintTestNames { 166 | printTestName() 167 | } 168 | var a, b []byte 169 | switch 1 { 170 | case 1: 171 | a = ab(AtoM + " " + AtoS + " " + AtoZ) 172 | b = ab("000" + AtoZ + " " + AtoZ + " " + AtoZ + " " + Nums) 173 | } 174 | // ------------------------------------------------------------------------- 175 | PL("\n" + Line) 176 | d1 := Make(a, b) 177 | PL("CREATED d1:") 178 | d1.Dump() 179 | // 180 | dbytes := d1.Bytes() 181 | PL("got 'dbytes'") 182 | // ------------------------------------------------------------------------- 183 | PL("\n" + Line) 184 | if DebugTiming { 185 | tmr.Start("Load") 186 | } 187 | d2, err := Load(dbytes) 188 | PL("CREATED d2: err:", err) 189 | d2.Dump() 190 | if DebugTiming { 191 | tmr.Stop("Load") 192 | tmr.Print() 193 | } 194 | } // Test03 195 | 196 | // go test --run Test04 197 | func Test04(t *testing.T) { 198 | if !RunExperiments { 199 | return 200 | } 201 | if PrintTestNames { 202 | printTestName() 203 | } 204 | d := Delta{ 205 | sourceSize: 111, 206 | sourceHash: []byte("SOURCE"), 207 | targetSize: 222, 208 | targetHash: []byte("TARGET"), 209 | newCount: 333, 210 | oldCount: 444, 211 | parts: []deltaPart{ 212 | {}, 213 | {}, 214 | }, 215 | } 216 | PL(d.GoString()) 217 | } // Test04 218 | 219 | // end 220 | -------------------------------------------------------------------------------- /func.go: -------------------------------------------------------------------------------- 1 | // ----------------------------------------------------------------------------- 2 | // github.com/balacode/go-delta go-delta/[func.go] 3 | // (c) balarabe@protonmail.com License: MIT 4 | // ----------------------------------------------------------------------------- 5 | 6 | package delta 7 | 8 | import ( 9 | "bytes" 10 | "compress/zlib" 11 | "crypto/sha512" 12 | "io" 13 | ) 14 | 15 | // ----------------------------------------------------------------------------- 16 | // # Helper Functions: Compression 17 | 18 | // compressBytes compresses an array of bytes and 19 | // returns the ZLIB-compressed array of bytes. 20 | func compressBytes(data []byte) []byte { 21 | if DebugTiming { 22 | tmr.Start("compressBytes") 23 | defer tmr.Stop("compressBytes") 24 | } 25 | if len(data) == 0 { 26 | return nil 27 | } 28 | // zip data in standard manner 29 | var b bytes.Buffer 30 | w := zlib.NewWriter(&b) 31 | _, err := w.Write(data) 32 | w.Close() 33 | // 34 | // log any problem 35 | const ERRM = "Failed compressing data with zlib:" 36 | if err != nil { 37 | mod.Error(ERRM, err) 38 | return nil 39 | } 40 | ret := b.Bytes() 41 | if len(ret) < 3 { 42 | mod.Error(ERRM, "length < 3") 43 | return nil 44 | } 45 | return ret 46 | } // compressBytes 47 | 48 | // uncompressBytes uncompresses a ZLIB-compressed array of bytes. 49 | func uncompressBytes(data []byte) []byte { 50 | readCloser, err := zlib.NewReader(bytes.NewReader(data)) 51 | if err != nil { 52 | mod.Error("uncompressBytes:", err) 53 | return nil 54 | } 55 | ret := bytes.NewBuffer(make([]byte, 0, 8192)) 56 | io.Copy(ret, readCloser) 57 | readCloser.Close() 58 | return ret.Bytes() 59 | } // uncompressBytes 60 | 61 | // ----------------------------------------------------------------------------- 62 | // # Helper Functions 63 | 64 | // makeHash returns the SHA-512 hash of byte slice 'data'. 65 | func makeHash(data []byte) []byte { 66 | if DebugTiming { 67 | tmr.Start("makeHash") 68 | defer tmr.Stop("makeHash") 69 | } 70 | if len(data) == 0 { 71 | return nil 72 | } 73 | ret := sha512.Sum512(data) 74 | return ret[:] 75 | } // makeHash 76 | 77 | // readHash returns the SHA-512 hash of the bytes from 'stream'. 78 | func readHash(stream io.Reader) []byte { 79 | if DebugTiming { 80 | tmr.Start("readHash") 81 | defer tmr.Stop("readHash") 82 | } 83 | hasher := sha512.New() 84 | buf := make([]byte, TempBufferSize) 85 | for first := true; ; first = false { 86 | n, err := stream.Read(buf) 87 | if err == io.EOF && first { 88 | return nil 89 | } 90 | if err == io.EOF { 91 | if n != 0 { 92 | mod.Error("Expected zero: n =", n) 93 | } 94 | break 95 | } 96 | if err != nil { 97 | mod.Error("Failed reading:", err) 98 | return nil 99 | } 100 | if n == 0 { 101 | break 102 | } 103 | n, err = hasher.Write(buf[:n]) 104 | if err != nil { 105 | mod.Error("Failed writing:", err) 106 | return nil 107 | } 108 | } 109 | ret := hasher.Sum(nil) 110 | return ret 111 | } // readHash 112 | 113 | // readLen returns the total size of 'stream' in bytes. 114 | // After a call to readLen, the current reading 115 | // position returns to the start or the stream. 116 | func readLen(stream io.ReadSeeker) int { 117 | ret, _ := stream.Seek(0, io.SeekEnd) 118 | stream.Seek(0, io.SeekStart) 119 | return int(ret) 120 | } // readLen 121 | 122 | // readStream _ _ 123 | func readStream(from io.ReadSeeker, to []byte) (n int64, err error) { 124 | // read from the stream 125 | { 126 | var num int 127 | num, err = from.Read(to) 128 | n = int64(num) 129 | } 130 | if err == io.EOF { 131 | if n != 0 { 132 | mod.Error("Expected zero: n =", n) 133 | } 134 | return -1, nil 135 | } 136 | if err != nil { 137 | return -1, mod.Error("Failed reading:", err) 138 | } 139 | return n, err 140 | } // readStream 141 | 142 | // end 143 | -------------------------------------------------------------------------------- /func_test.go: -------------------------------------------------------------------------------- 1 | // ----------------------------------------------------------------------------- 2 | // github.com/balacode/go-delta go-delta/[func_test.go] 3 | // (c) balarabe@protonmail.com License: MIT 4 | // ----------------------------------------------------------------------------- 5 | 6 | package delta 7 | 8 | // to generate a test coverage report for the whole module use: 9 | // go test -coverprofile cover.out 10 | // go tool cover -html=cover.out 11 | 12 | import ( 13 | "bytes" 14 | "fmt" 15 | "os" 16 | "runtime" 17 | "strings" 18 | "testing" 19 | ) 20 | 21 | const ( 22 | AtoM = "ABCDEFGHIJKLM" 23 | AtoS = "ABCDEFGHIJKLMNOPQRS" 24 | AtoZ = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 25 | Nums = "0123456789" 26 | atoz = "abcdefghijklmnopqrstuvwxyz" 27 | ) 28 | 29 | const PrintTestNames = true 30 | 31 | var Line = strings.Repeat("#", 70) 32 | 33 | // ----------------------------------------------------------------------------- 34 | // # Function Unit Tests 35 | 36 | // go test --run Test_readHash_ 37 | func Test_readHash_(t *testing.T) { 38 | if PrintTestNames { 39 | printTestName() 40 | } 41 | var test = func(input []byte) { 42 | var resultHash []byte 43 | { 44 | buf := bytes.NewBuffer(input) 45 | resultHash = readHash(buf) 46 | } 47 | var expectHash []byte 48 | { 49 | buf := bytes.NewBuffer(input) 50 | expectHash = makeHash(buf.Bytes()) 51 | } 52 | if !bytes.Equal(resultHash, expectHash) { 53 | t.Errorf("\n input:\n\t%v\n%s\n expect:%v\n\t result:\n\t%v\n", 54 | input, string(input), expectHash, resultHash) 55 | } 56 | } 57 | TempBufferSize = 100 58 | test(nil) 59 | test([]byte("abc")) 60 | test([]byte(strings.Repeat("abc", 1024))) 61 | } // Test_readHash_ 62 | 63 | // ----------------------------------------------------------------------------- 64 | // # Test Helper Functions 65 | 66 | // ab converts s to a byte array. 67 | func ab(s string) []byte { 68 | return []byte(s) 69 | } // ab 70 | 71 | // printTestName prints the name of the calling unit test. 72 | func printTestName() { 73 | if !PrintTestNames { 74 | return 75 | } 76 | funcName := func() string { 77 | var ( 78 | programCounter, _, _, _ = runtime.Caller(2) 79 | ret = runtime.FuncForPC(programCounter).Name() 80 | i = strings.LastIndex(ret, ".") 81 | ) 82 | if i > -1 { 83 | ret = ret[i+1:] 84 | } 85 | ret += "()" 86 | return ret 87 | } 88 | fmt.Println("Running test:", funcName()) 89 | } // printTestName 90 | 91 | // readData reads 'filename' and returns its contents as an array of bytes. 92 | func readData(filename string) []byte { 93 | ret, err := os.ReadFile(filename) 94 | if err != nil { 95 | PL("File reading error:", err) 96 | return nil 97 | } 98 | return ret 99 | } // readData 100 | 101 | // end 102 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | // ----------------------------------------------------------------------------- 2 | // github.com/balacode/go-delta go-delta/[go.mod] 3 | // (c) balarabe@protonmail.com License: MIT 4 | // ----------------------------------------------------------------------------- 5 | 6 | module github.com/balacode/go-delta 7 | 8 | go 1.16 9 | 10 | require github.com/balacode/zr v1.1.0 11 | 12 | // end 13 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/balacode/zr v1.1.0 h1:3UsFdsRjPd6inBA1Nr9fAVHNJPor0x1K4RYArFmuzWE= 2 | github.com/balacode/zr v1.1.0/go.mod h1:Gek772GtTXR/nDElnIYqB8kHcg3nKhjzTt1yZNUVnmA= 3 | -------------------------------------------------------------------------------- /index_map.go: -------------------------------------------------------------------------------- 1 | // ----------------------------------------------------------------------------- 2 | // github.com/balacode/go-delta go-delta/[index_map.go] 3 | // (c) balarabe@protonmail.com License: MIT 4 | // ----------------------------------------------------------------------------- 5 | 6 | package delta 7 | 8 | const DebugIndex = false 9 | 10 | type chunk [MatchSize]byte 11 | 12 | // indexMap _ _ 13 | type indexMap struct { 14 | m map[chunk][]int 15 | } // indexMap 16 | 17 | // makeMap creates a map of unique chunks in 'data'. 18 | // The key specifies the unique chunk of bytes, while the 19 | // values array returns the positions of the chunk in 'data'. 20 | func makeMap(data []byte) indexMap { 21 | if DebugTiming { 22 | tmr.Start("makeMap") 23 | defer tmr.Stop("makeMap") 24 | } 25 | if DebugIndex { 26 | PL("makeMap init:", len(data), "bytes") 27 | } 28 | lenData := len(data) 29 | if lenData < MatchSize { 30 | return indexMap{m: map[chunk][]int{}} 31 | } 32 | dbgN := 0 33 | ret := indexMap{m: make(map[chunk][]int, lenData/4)} 34 | var key chunk 35 | lenData -= MatchSize 36 | if DebugIndex { 37 | PL("makeMap begin loop") 38 | } 39 | for i := 0; i < lenData; { 40 | copy(key[:], data[i:]) 41 | ar, found := ret.m[key] 42 | if !found { 43 | ret.m[key] = []int{i} 44 | i++ 45 | continue 46 | } 47 | if len(ar) >= MatchLimit { 48 | i++ 49 | continue 50 | } 51 | ret.m[key] = append(ret.m[key], i) 52 | i += MatchSize 53 | if DebugIndex { 54 | dbgN++ 55 | if dbgN < 10e6 { 56 | continue 57 | } 58 | dbgN = 0 59 | PL("i:", i, "len(m):", len(ret.m)) 60 | } 61 | } 62 | return ret 63 | } // makeMap 64 | 65 | // get _ _ 66 | func (ob *indexMap) get(key chunk) (locs []int, found bool) { 67 | locs, found = ob.m[key] 68 | return 69 | } // get 70 | 71 | // end 72 | -------------------------------------------------------------------------------- /integrated_test.go: -------------------------------------------------------------------------------- 1 | // ----------------------------------------------------------------------------- 2 | // github.com/balacode/go-delta go-delta/[integrated_test.go] 3 | // (c) balarabe@protonmail.com License: MIT 4 | // ----------------------------------------------------------------------------- 5 | 6 | package delta 7 | 8 | // This unit test checks the functioning of the entire module. 9 | // It calls Make(), Delta.Apply(), Delta.Bytes() and delta.Load(). 10 | 11 | import ( 12 | "bytes" 13 | "testing" 14 | ) 15 | 16 | // go test --run Test_Integrated_ 17 | func Test_Integrated_(t *testing.T) { 18 | if PrintTestNames { 19 | printTestName() 20 | } 21 | vals := [][]byte{ 22 | ab(""), 23 | ab(" "), 24 | ab(AtoZ), 25 | ab(AtoM), 26 | // 27 | ab("start" + Nums), 28 | ab(Nums + "middle" + Nums), 29 | ab(Nums + Nums + "end"), 30 | // 31 | ab( 32 | "Lorem ipsum dolor sit amet, consetetur sadipscing elitr," + 33 | " sed diam nonumy eirmod tempor invidunt ut labore et" + 34 | " dolore magna aliquyam erat, sed diam voluptua. At vero" + 35 | " eos et accusam et justo duo dolores et ea rebum. Stet" + 36 | " clita kasd gubergren, no sea takimata sanctus est Lorem" + 37 | " ipsum dolor sit amet. Lorem ipsum dolor sit amet," + 38 | " consetetur sadipscing elitr, sed diam nonumy eirmod" + 39 | " tempor invidunt ut labore et dolore magna aliquyam erat," + 40 | " sed diam voluptua. At vero eos et accusam et justo duo" + 41 | " dolores et ea rebum. Stet clita kasd gubergren, no sea" + 42 | " takimata sanctus est Lorem ipsum dolor sit amet. Lorem" + 43 | " ipsum dolor sit amet, consetetur sadipscing elitr, sed" + 44 | " diam nonumy eirmod tempor invidunt ut labore et dolore" + 45 | " magna aliquyam erat, sed diam voluptua. At vero eos et" + 46 | " accusam et justo duo dolores et ea rebum. Stet clita" + 47 | " kasd gubergren, no sea takimata sanctus est Lorem ipsum" + 48 | " dolor sit amet. suscipit lobortis nisl ut aliquip ex ea" + 49 | " commodo consequat"), 50 | ab( 51 | "Lorem ipsum dolor sit amet, consetetur sadipscing elitr"), 52 | ab( 53 | " consetetur sadipscing elitr, sed diam nonumy eirmod" + 54 | " magna aliquyam erat, sed diam voluptua. At vero eos et"), 55 | ab( 56 | "sit amet, consetetur sadipscing elitr" + 57 | " sed diam nonumy eirmod tempor"), 58 | ab( 59 | "suscipit lobortis nisl ut aliquip ex ea commodo consequat."), 60 | ab( 61 | "Lorem ipsum dolor sit amet, consetetur sadipscing elitr," + 62 | AtoZ + 63 | " sed diam voluptua. At vero eos et accusam et justo duo" + 64 | AtoM + 65 | " commodo consequat"), 66 | } 67 | for _, a := range vals { 68 | for _, b := range vals { 69 | var ar []byte 70 | { 71 | d := Make(a, b) 72 | ar = d.Bytes() 73 | } 74 | var d Delta 75 | var err error 76 | d, err = Load(ar) 77 | if err != nil { 78 | PL("FAILED @1") 79 | PL("SOURCE:", "\n", string(a)) 80 | PL("TARGET:", "\n", string(b)) 81 | PL("ERROR:", err) 82 | continue 83 | } 84 | var result []byte 85 | result, err = d.Apply(a) 86 | if err != nil { 87 | PL("FAILED @2") 88 | PL("SOURCE:", "\n", string(a)) 89 | PL("TARGET:", "\n", string(b)) 90 | PL("ERROR:", err) 91 | continue 92 | } 93 | if !bytes.Equal(result, b) { 94 | PL("FAILED @3") 95 | PL("SOURCE:", "\n", string(a)) 96 | PL("TARGET:", "\n", string(b)) 97 | PL("RETURNED:", "\n", string(result)) 98 | } 99 | } 100 | } 101 | } // Test_Integrated_ 102 | 103 | // end 104 | -------------------------------------------------------------------------------- /lorem_ipsum.txt: -------------------------------------------------------------------------------- 1 | -- ----------------------------------------------------------------------------- 2 | -- github.com/balacode/go-delta go-delta/[lorem_ipsum.txt] 3 | -- (c) balarabe@protonmail.com License: MIT 4 | -- ----------------------------------------------------------------------------- 5 | 6 | abcdefghijklmnopqrstuvwxyz 7 | abcdefghijklmnopqrstuvwxyz 8 | abcdefghijklmnopqrstuvwxyz 9 | 10 | 1234567890 11 | 1234567890 12 | 13 | Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore 14 | et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea 15 | rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum 16 | dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore 17 | magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet 18 | clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, 19 | consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam 20 | erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd 21 | gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. 22 | 23 | Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore 24 | eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum 25 | zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer 26 | adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. 27 | 28 | Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip ex 29 | ea commodo consequat. Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie 30 | consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim 31 | qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. 32 | 33 | Nam liber tempor cum soluta nobis eleifend option congue nihil imperdiet doming id quod mazim placerat 34 | facer possim assum. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh 35 | euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. Ut wisi enim ad minim veniam, quis 36 | nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip ex ea commodo consequat. 37 | 38 | Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore 39 | eu feugiat nulla facilisis. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd 40 | gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur 41 | sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed 42 | diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea 43 | takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, 44 | At accusam aliquyam diam diam dolore dolores duo eirmod eos erat, et nonumy sed tempor et et invidunt 45 | justo labore Stet clita ea et gubergren, kasd magna no rebum. sanctus sea sed takimata ut vero voluptua. 46 | est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy 47 | eirmod tempor invidunt ut laboreet dolore magna aliquyam erat. 48 | 49 | Consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam 50 | erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd 51 | gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur 52 | sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed 53 | diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea 54 | takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, 55 | sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At 56 | vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus 57 | est Lorem ipsum dolor sit amet. 58 | 59 | -- end 60 | -------------------------------------------------------------------------------- /make.go: -------------------------------------------------------------------------------- 1 | // ----------------------------------------------------------------------------- 2 | // github.com/balacode/go-delta go-delta/[make.go] 3 | // (c) balarabe@protonmail.com License: MIT 4 | // ----------------------------------------------------------------------------- 5 | 6 | package delta 7 | 8 | import ( 9 | "bytes" 10 | ) 11 | 12 | // Make given two byte arrays 'a' and 'b', calculates the binary 13 | // delta difference between the two arrays and returns it as a Delta. 14 | // You can then use Delta.Apply() to generate 'b' from 'a' the Delta. 15 | func Make(a, b []byte) Delta { 16 | if DebugTiming { 17 | tmr.Start("delta.Make") 18 | defer tmr.Stop("delta.Make") 19 | } 20 | ret := Delta{ 21 | sourceSize: len(a), 22 | sourceHash: makeHash(a), 23 | targetSize: len(b), 24 | targetHash: makeHash(b), 25 | } 26 | lenB := len(b) 27 | if lenB < MatchSize { 28 | ret.parts = []deltaPart{{sourceLoc: -1, size: lenB, data: b}} 29 | return ret 30 | } 31 | cmap := makeMap(a) 32 | var key chunk 33 | tmc := 0 // timing counter 34 | for i := 0; i < lenB; { 35 | if DebugInfo && i-tmc >= 10000 { 36 | PL("delta.Make:", int(100.0/float32(lenB)*float32(i)), "%") 37 | tmc = i 38 | } 39 | if lenB-i < MatchSize { 40 | ret.write(-1, lenB-i, b[i:]) 41 | ret.newCount++ 42 | break 43 | } 44 | var locs []int 45 | found := false 46 | if lenB-i >= MatchSize { 47 | copy(key[:], b[i:]) 48 | locs, found = cmap.get(key) 49 | } 50 | if found { 51 | at, size := longestMatch(a, locs, b, i) 52 | ret.write(at, size, nil) 53 | i += size 54 | ret.oldCount++ 55 | continue 56 | } 57 | ret.write(-1, MatchSize, key[:]) 58 | i += MatchSize 59 | ret.newCount++ 60 | } 61 | if DebugInfo { 62 | PL("delta.Make: finished writing parts. len(b) = ", len(b)) 63 | } 64 | return ret 65 | } // Make 66 | 67 | // ----------------------------------------------------------------------------- 68 | // # Helper Functions 69 | 70 | // longestMatch is called by Make() to determine the longest 71 | // matching block of bytes between the source array 'a' 72 | // and target array 'b' out of limited choices. 73 | // 74 | // 'bLoc' specifies the position (in 'b') of the chunk to match. 75 | // The MatchSize global constant specifies the length of each 76 | // chunk in bytes, usually 8 bytes. 77 | // 78 | // 'aLocs' is an array of positions (in 'a') at which the chunk is found. 79 | // This array is produced by makeMap() before longestMatch() is called. 80 | // 81 | // Returns the location ('loc') of the match in 'a' 82 | // and the length of the match in 'b' ('size'). 83 | // 84 | func longestMatch(a []byte, aLocs []int, b []byte, bLoc int) (loc, size int) { 85 | if DebugTiming { 86 | tmr.Start("longestMatch") 87 | defer tmr.Stop("longestMatch") 88 | } 89 | if len(aLocs) < 1 { 90 | mod.Error("aLocs is empty") 91 | return -1, -1 92 | } 93 | bEnd := len(b) - 1 94 | if bLoc < 0 || bLoc > bEnd { 95 | mod.Error("bLoc", bLoc, "out of range [0 -", len(b), "]") 96 | return -1, -1 97 | } 98 | var ( 99 | aEnd = len(a) - 1 100 | retLoc = -1 101 | retSize = -1 102 | ) 103 | for _, ai := range aLocs { 104 | n := MatchSize 105 | bi := bLoc 106 | if !bytes.Equal(a[ai:ai+n], b[bi:bi+n]) { 107 | mod.Error("mismatch at ai:", ai, "bi:", bi) 108 | continue 109 | } 110 | /* 111 | DISABLED: EXTENDING MATCH BACKWARD OVERLAPS PREVIOUSLY-WRITTEN PARTS 112 | // extend match backward 113 | for ai-1 >= 0 && bi-1 >= 0 && a[ai-1] == b[bi-1] { 114 | ai-- 115 | bi-- 116 | n++ 117 | } 118 | */ 119 | // extend match forward 120 | for ai+n <= aEnd && bi+n <= bEnd && a[ai+n] == b[bi+n] { 121 | n++ 122 | } 123 | if n > retSize { 124 | retLoc = ai 125 | retSize = n 126 | } 127 | } 128 | return retLoc, retSize 129 | } // longestMatch 130 | 131 | // end 132 | -------------------------------------------------------------------------------- /make_test.go: -------------------------------------------------------------------------------- 1 | // ----------------------------------------------------------------------------- 2 | // github.com/balacode/go-delta go-delta/[make_test.go] 3 | // (c) balarabe@protonmail.com License: MIT 4 | // ----------------------------------------------------------------------------- 5 | 6 | package delta 7 | 8 | import ( 9 | "testing" 10 | ) 11 | 12 | // go test --run Test_Make_ 13 | func Test_Make_(t *testing.T) { 14 | if PrintTestNames { 15 | printTestName() 16 | } 17 | // func Make(a, b []byte) Delta 18 | // 19 | test := func(a, b []byte, expect Delta) { 20 | result := Make(a, b) 21 | if result.GoString() != expect.GoString() { 22 | t.Errorf("\n expect:\n\t%s\n result:\n\t%s\n", 23 | expect.GoString(), result.GoString()) 24 | } 25 | } 26 | test( 27 | ab(AtoZ), 28 | ab(AtoZ), 29 | Delta{ 30 | sourceSize: 26, 31 | sourceHash: makeHash(ab(AtoZ)), 32 | targetSize: 26, 33 | targetHash: makeHash(ab(AtoZ)), 34 | newCount: 0, 35 | oldCount: 1, 36 | parts: []deltaPart{ 37 | {sourceLoc: 0, size: 26, data: nil}, 38 | }, 39 | }, 40 | ) 41 | } // Test_Make_ 42 | 43 | // end 44 | -------------------------------------------------------------------------------- /module.go: -------------------------------------------------------------------------------- 1 | // ----------------------------------------------------------------------------- 2 | // github.com/balacode/go-delta go-delta/[module.go] 3 | // (c) balarabe@protonmail.com License: MIT 4 | // ----------------------------------------------------------------------------- 5 | 6 | package delta 7 | 8 | import ( 9 | "bytes" 10 | "errors" 11 | "fmt" 12 | 13 | "github.com/balacode/zr" 14 | ) 15 | 16 | // ----------------------------------------------------------------------------- 17 | // # Module Constants / Variables 18 | 19 | const ( 20 | // MatchLimit specifies the maximum number of positions tracked 21 | // for each unique key in the map of source data. See makeMap(). 22 | MatchLimit = 50 23 | 24 | // MatchSize specifies the size of unique 25 | // chunks being searched for, in bytes. 26 | MatchSize = 9 27 | ) 28 | 29 | var ( 30 | // PL is fmt.Println() but is used only for debugging. 31 | PL = fmt.Println 32 | 33 | // TempBufferSize sets the size of memory buffers for reading files and other 34 | // streams. This memory is not fixed but allocated/released transiently. 35 | TempBufferSize = 32 * 1024 * 1024 // 32 MB 36 | 37 | // tmr is used for timing all methods/functions during tuning. 38 | tmr zr.Timer 39 | ) 40 | 41 | // ----------------------------------------------------------------------------- 42 | // # Debugging Flags 43 | 44 | var ( 45 | // DebugInfo when set, causes printing of messages helpful for debugging. 46 | DebugInfo = false 47 | 48 | // DebugTiming controls timing (benchmarking) of time spent in each function. 49 | DebugTiming = true 50 | 51 | // DebugWriteArgs when set, prints the arguments passed to write() 52 | DebugWriteArgs = false 53 | ) 54 | 55 | // ----------------------------------------------------------------------------- 56 | // # Error Handler 57 | 58 | // SetErrorFunc changes the error-handling function, so that 59 | // all errors in this package will be sent to this handler, 60 | // which is useful for custom logging and mocking during unit tests. 61 | // To restore the default error handler use SetErrorFunc(nil). 62 | func SetErrorFunc(fn func(args ...interface{}) error) { 63 | if fn == nil { 64 | mod.Error = defaultErrorFunc 65 | return 66 | } 67 | mod.Error = fn 68 | } // SetErrorFunc 69 | 70 | // defaultErrorFunc is the default error 71 | // handling function assigned to mod.Error 72 | func defaultErrorFunc(args ...interface{}) error { 73 | // 74 | // write all args to a message string (add spaces between args) 75 | var buf bytes.Buffer 76 | for i, arg := range args { 77 | if i > 0 { 78 | buf.WriteString(" ") 79 | } 80 | buf.WriteString(fmt.Sprint(arg)) 81 | } 82 | msg := buf.String() 83 | // 84 | // if DebugInfo is on, print the message to the console 85 | if DebugInfo { 86 | fmt.Println("ERROR:\n", msg) 87 | } 88 | // return error based on message 89 | return errors.New(msg) 90 | } // defaultErrorFunc 91 | 92 | // ----------------------------------------------------------------------------- 93 | // # Module Global 94 | 95 | // mod variable though wich mockable functions are called 96 | var mod = thisMod{Error: defaultErrorFunc} 97 | 98 | // thisMod specifies mockable functions 99 | type thisMod struct { 100 | Error func(args ...interface{}) error 101 | } 102 | 103 | // ModReset restores all mocked functions to the original standard functions. 104 | func (ob *thisMod) Reset() { ob.Error = defaultErrorFunc } 105 | 106 | // end 107 | --------------------------------------------------------------------------------