├── .github └── workflows │ └── go.yml ├── .golangci.yml ├── LICENSE ├── README.md ├── gitdiff ├── apply.go ├── apply_binary.go ├── apply_test.go ├── apply_text.go ├── assert_test.go ├── base85.go ├── base85_test.go ├── binary.go ├── binary_test.go ├── file_header.go ├── file_header_test.go ├── format.go ├── format_roundtrip_test.go ├── format_test.go ├── gitdiff.go ├── gitdiff_test.go ├── io.go ├── io_test.go ├── parser.go ├── parser_test.go ├── patch_header.go ├── patch_header_test.go ├── patch_identity.go ├── patch_identity_test.go ├── testdata │ ├── apply │ │ ├── bin.go │ │ ├── bin_fragment_delta_error.src │ │ ├── bin_fragment_delta_error_dst_size.patch │ │ ├── bin_fragment_delta_error_incomplete_add.patch │ │ ├── bin_fragment_delta_error_incomplete_copy.patch │ │ ├── bin_fragment_delta_error_src_size.patch │ │ ├── bin_fragment_delta_modify.out │ │ ├── bin_fragment_delta_modify.patch │ │ ├── bin_fragment_delta_modify.src │ │ ├── bin_fragment_delta_modify_large.out │ │ ├── bin_fragment_delta_modify_large.patch │ │ ├── bin_fragment_delta_modify_large.src │ │ ├── bin_fragment_literal_create.out │ │ ├── bin_fragment_literal_create.patch │ │ ├── bin_fragment_literal_create.src │ │ ├── bin_fragment_literal_modify.out │ │ ├── bin_fragment_literal_modify.patch │ │ ├── bin_fragment_literal_modify.src │ │ ├── file_bin_modify.out │ │ ├── file_bin_modify.patch │ │ ├── file_bin_modify.src │ │ ├── file_mode_change.out │ │ ├── file_mode_change.patch │ │ ├── file_mode_change.src │ │ ├── file_text.src │ │ ├── file_text_delete.out │ │ ├── file_text_delete.patch │ │ ├── file_text_error_partial_delete.patch │ │ ├── file_text_modify.out │ │ ├── file_text_modify.patch │ │ ├── text_fragment_add_end.out │ │ ├── text_fragment_add_end.patch │ │ ├── text_fragment_add_end.src │ │ ├── text_fragment_add_end_noeol.out │ │ ├── text_fragment_add_end_noeol.patch │ │ ├── text_fragment_add_end_noeol.src │ │ ├── text_fragment_add_middle.out │ │ ├── text_fragment_add_middle.patch │ │ ├── text_fragment_add_middle.src │ │ ├── text_fragment_add_start.out │ │ ├── text_fragment_add_start.patch │ │ ├── text_fragment_add_start.src │ │ ├── text_fragment_change_end.out │ │ ├── text_fragment_change_end.patch │ │ ├── text_fragment_change_end.src │ │ ├── text_fragment_change_end_eol.out │ │ ├── text_fragment_change_end_eol.patch │ │ ├── text_fragment_change_end_eol.src │ │ ├── text_fragment_change_exact.out │ │ ├── text_fragment_change_exact.patch │ │ ├── text_fragment_change_exact.src │ │ ├── text_fragment_change_middle.out │ │ ├── text_fragment_change_middle.patch │ │ ├── text_fragment_change_middle.src │ │ ├── text_fragment_change_single_noeol.out │ │ ├── text_fragment_change_single_noeol.patch │ │ ├── text_fragment_change_single_noeol.src │ │ ├── text_fragment_change_start.out │ │ ├── text_fragment_change_start.patch │ │ ├── text_fragment_change_start.src │ │ ├── text_fragment_delete_all.out │ │ ├── text_fragment_delete_all.patch │ │ ├── text_fragment_delete_all.src │ │ ├── text_fragment_error.src │ │ ├── text_fragment_error_context_conflict.patch │ │ ├── text_fragment_error_delete_conflict.patch │ │ ├── text_fragment_error_new_file.patch │ │ ├── text_fragment_error_short_src.patch │ │ ├── text_fragment_error_short_src_before.patch │ │ ├── text_fragment_new.out │ │ ├── text_fragment_new.patch │ │ └── text_fragment_new.src │ ├── new_binary_file.patch │ ├── no_files.patch │ ├── one_file.patch │ ├── string │ │ ├── binary_modify.patch │ │ ├── binary_modify_nodata.patch │ │ ├── binary_new.patch │ │ ├── copy.patch │ │ ├── copy_modify.patch │ │ ├── delete.patch │ │ ├── mode.patch │ │ ├── mode_modify.patch │ │ ├── modify.patch │ │ ├── new.patch │ │ ├── new_empty.patch │ │ ├── new_mode.patch │ │ ├── rename.patch │ │ └── rename_modify.patch │ └── two_files.patch ├── text.go └── text_test.go └── go.mod /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | on: 3 | pull_request: 4 | push: 5 | branches: [master] 6 | 7 | jobs: 8 | verify: 9 | name: Verify 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Set up Go 1.21 13 | uses: actions/setup-go@v5 14 | with: 15 | go-version: 1.21 16 | 17 | - name: Check out code into the Go module directory 18 | uses: actions/checkout@v4 19 | 20 | - name: Lint 21 | uses: golangci/golangci-lint-action@v7 22 | with: 23 | version: v2.0 24 | 25 | - name: Test 26 | run: go test -v ./... 27 | -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | 3 | run: 4 | tests: false 5 | 6 | linters: 7 | default: none 8 | enable: 9 | - errcheck 10 | - govet 11 | - ineffassign 12 | - misspell 13 | - revive 14 | - unconvert 15 | - unused 16 | settings: 17 | errcheck: 18 | exclude-functions: 19 | - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).Write 20 | - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteString 21 | - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteByte 22 | - fmt.Fprintf(*github.com/bluekeyes/go-gitdiff/gitdiff.formatter) 23 | revive: 24 | rules: 25 | - name: context-keys-type 26 | - name: time-naming 27 | - name: var-declaration 28 | - name: unexported-return 29 | - name: errorf 30 | - name: blank-imports 31 | - name: context-as-argument 32 | - name: dot-imports 33 | - name: error-return 34 | - name: error-strings 35 | - name: error-naming 36 | - name: exported 37 | - name: increment-decrement 38 | - name: var-naming 39 | - name: package-comments 40 | - name: range 41 | - name: receiver-naming 42 | - name: indent-error-flow 43 | 44 | formatters: 45 | enable: 46 | - gofmt 47 | - goimports 48 | settings: 49 | goimports: 50 | local-prefixes: 51 | - github.com/bluekeyes/go-gitdiff 52 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Billy Keyes 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # go-gitdiff 2 | 3 | [![PkgGoDev](https://pkg.go.dev/badge/github.com/bluekeyes/go-gitdiff/gitdiff)](https://pkg.go.dev/github.com/bluekeyes/go-gitdiff/gitdiff) [![Go Report Card](https://goreportcard.com/badge/github.com/bluekeyes/go-gitdiff)](https://goreportcard.com/report/github.com/bluekeyes/go-gitdiff) 4 | 5 | A Go library for parsing and applying patches generated by `git diff`, `git 6 | show`, and `git format-patch`. It can also parse and apply unified diffs 7 | generated by the standard GNU `diff` tool. 8 | 9 | It supports standard line-oriented text patches and Git binary patches, and 10 | aims to parse anything accepted by the `git apply` command. 11 | 12 | ```golang 13 | patch, err := os.Open("changes.patch") 14 | if err != nil { 15 | log.Fatal(err) 16 | } 17 | 18 | // files is a slice of *gitdiff.File describing the files changed in the patch 19 | // preamble is a string of the content of the patch before the first file 20 | files, preamble, err := gitdiff.Parse(patch) 21 | if err != nil { 22 | log.Fatal(err) 23 | } 24 | 25 | code, err := os.Open("code.go") 26 | if err != nil { 27 | log.Fatal(err) 28 | } 29 | 30 | // apply the changes in the patch to a source file 31 | var output bytes.Buffer 32 | if err := gitdiff.Apply(&output, code, files[0]); err != nil { 33 | log.Fatal(err) 34 | } 35 | ``` 36 | 37 | ## Development Status 38 | 39 | The parsing API and types are complete and I expect will remain stable. Version 40 | 0.7.0 introduced a new apply API that may change more in the future to support 41 | non-strict patch application. 42 | 43 | Parsing and strict application are well-covered by unit tests and the library 44 | is used in a production application that parses and applies thousands of 45 | patches every day. However, the space of all possible patches is large, so 46 | there are likely undiscovered bugs. 47 | 48 | The parsing code has also had a modest amount of fuzz testing. 49 | 50 | ## Why another git/unified diff parser? 51 | 52 | [Several][sourcegraph] [packages][sergi] with [similar][waigani] 53 | [functionality][seletskiy] exist, so why did I write another? 54 | 55 | 1. No other packages I found support binary diffs, as generated with the 56 | `--binary` flag. This is the main reason for writing a new package, as the 57 | format is pretty different from line-oriented diffs and is unique to Git. 58 | 59 | 2. Most other packages only parse patches, so you need additional code to apply 60 | them (and if applies are supported, it is only for text files.) 61 | 62 | 3. This package aims to accept anything that `git apply` accepts, and closely 63 | follows the logic in [`apply.c`][apply.c]. 64 | 65 | 4. It seemed like a fun project and a way to learn more about Git. 66 | 67 | [sourcegraph]: https://github.com/sourcegraph/go-diff 68 | [sergi]: https://github.com/sergi/go-diff 69 | [waigani]: https://github.com/waigani/diffparser 70 | [seletskiy]: https://github.com/seletskiy/godiff 71 | 72 | [apply.c]: https://github.com/git/git/blob/master/apply.c 73 | 74 | ## Differences From Git 75 | 76 | 1. Certain types of invalid input that are accepted by `git apply` generate 77 | errors. These include: 78 | 79 | - Numbers immediately followed by non-numeric characters 80 | - Trailing characters on a line after valid or expected content 81 | - Malformed file header lines (lines that start with `diff --git`) 82 | 83 | 2. Errors for invalid input are generally more verbose and specific than those 84 | from `git apply`. 85 | 86 | 3. The translation from C to Go may have introduced inconsistencies in the way 87 | Unicode file names are handled; these are bugs, so please report any issues 88 | of this type. 89 | 90 | 4. When reading headers, there is no validation that OIDs present on an `index` 91 | line are shorter than or equal to the maximum hash length, as this requires 92 | knowing if the repository used SHA1 or SHA256 hashes. 93 | 94 | 5. When reading "traditional" patches (those not produced by `git`), prefixes 95 | are not stripped from file names; `git apply` attempts to remove prefixes 96 | that match the current repository directory/prefix. 97 | 98 | 6. Patches can only be applied in "strict" mode, where the line numbers and 99 | context of each fragment must exactly match the source file; `git apply` 100 | implements a search algorithm that tries different lines and amounts of 101 | context, with further options to normalize or ignore whitespace changes. 102 | 103 | 7. When parsing mail-formatted patch headers, leading and trailing whitespace 104 | is always removed from `Subject` lines. There is no exact equivalent to `git 105 | mailinfo -k`. 106 | -------------------------------------------------------------------------------- /gitdiff/apply.go: -------------------------------------------------------------------------------- 1 | package gitdiff 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "io" 7 | "sort" 8 | ) 9 | 10 | // Conflict indicates an apply failed due to a conflict between the patch and 11 | // the source content. 12 | // 13 | // Users can test if an error was caused by a conflict by using errors.Is with 14 | // an empty Conflict: 15 | // 16 | // if errors.Is(err, &Conflict{}) { 17 | // // handle conflict 18 | // } 19 | type Conflict struct { 20 | msg string 21 | } 22 | 23 | func (c *Conflict) Error() string { 24 | return "conflict: " + c.msg 25 | } 26 | 27 | // Is implements error matching for Conflict. Passing an empty instance of 28 | // Conflict always returns true. 29 | func (c *Conflict) Is(other error) bool { 30 | if other, ok := other.(*Conflict); ok { 31 | return other.msg == "" || other.msg == c.msg 32 | } 33 | return false 34 | } 35 | 36 | // ApplyError wraps an error that occurs during patch application with 37 | // additional location information, if it is available. 38 | type ApplyError struct { 39 | // Line is the one-indexed line number in the source data 40 | Line int64 41 | // Fragment is the one-indexed fragment number in the file 42 | Fragment int 43 | // FragmentLine is the one-indexed line number in the fragment 44 | FragmentLine int 45 | 46 | err error 47 | } 48 | 49 | // Unwrap returns the wrapped error. 50 | func (e *ApplyError) Unwrap() error { 51 | return e.err 52 | } 53 | 54 | func (e *ApplyError) Error() string { 55 | return fmt.Sprintf("%v", e.err) 56 | } 57 | 58 | type lineNum int 59 | type fragNum int 60 | type fragLineNum int 61 | 62 | // applyError creates a new *ApplyError wrapping err or augments the information 63 | // in err with args if it is already an *ApplyError. Returns nil if err is nil. 64 | func applyError(err error, args ...interface{}) error { 65 | if err == nil { 66 | return nil 67 | } 68 | 69 | e, ok := err.(*ApplyError) 70 | if !ok { 71 | if err == io.EOF { 72 | err = io.ErrUnexpectedEOF 73 | } 74 | e = &ApplyError{err: err} 75 | } 76 | for _, arg := range args { 77 | switch v := arg.(type) { 78 | case lineNum: 79 | e.Line = int64(v) + 1 80 | case fragNum: 81 | e.Fragment = int(v) + 1 82 | case fragLineNum: 83 | e.FragmentLine = int(v) + 1 84 | } 85 | } 86 | return e 87 | } 88 | 89 | var ( 90 | errApplyInProgress = errors.New("gitdiff: incompatible apply in progress") 91 | errApplierClosed = errors.New("gitdiff: applier is closed") 92 | ) 93 | 94 | // Apply applies the changes in f to src, writing the result to dst. It can 95 | // apply both text and binary changes. 96 | // 97 | // If an error occurs while applying, Apply returns an *ApplyError that 98 | // annotates the error with additional information. If the error is because of 99 | // a conflict with the source, the wrapped error will be a *Conflict. 100 | func Apply(dst io.Writer, src io.ReaderAt, f *File) error { 101 | if f.IsBinary { 102 | if len(f.TextFragments) > 0 { 103 | return applyError(errors.New("binary file contains text fragments")) 104 | } 105 | if f.BinaryFragment == nil { 106 | return applyError(errors.New("binary file does not contain a binary fragment")) 107 | } 108 | } else { 109 | if f.BinaryFragment != nil { 110 | return applyError(errors.New("text file contains a binary fragment")) 111 | } 112 | } 113 | 114 | switch { 115 | case f.BinaryFragment != nil: 116 | applier := NewBinaryApplier(dst, src) 117 | if err := applier.ApplyFragment(f.BinaryFragment); err != nil { 118 | return err 119 | } 120 | return applier.Close() 121 | 122 | case len(f.TextFragments) > 0: 123 | frags := make([]*TextFragment, len(f.TextFragments)) 124 | copy(frags, f.TextFragments) 125 | 126 | sort.Slice(frags, func(i, j int) bool { 127 | return frags[i].OldPosition < frags[j].OldPosition 128 | }) 129 | 130 | // TODO(bkeyes): consider merging overlapping fragments 131 | // right now, the application fails if fragments overlap, but it should be 132 | // possible to precompute the result of applying them in order 133 | 134 | applier := NewTextApplier(dst, src) 135 | for i, frag := range frags { 136 | if err := applier.ApplyFragment(frag); err != nil { 137 | return applyError(err, fragNum(i)) 138 | } 139 | } 140 | return applier.Close() 141 | 142 | default: 143 | // nothing to apply, just copy all the data 144 | _, err := copyFrom(dst, src, 0) 145 | return err 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /gitdiff/apply_binary.go: -------------------------------------------------------------------------------- 1 | package gitdiff 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "io" 7 | ) 8 | 9 | // BinaryApplier applies binary changes described in a fragment to source data. 10 | // The applier must be closed after use. 11 | type BinaryApplier struct { 12 | dst io.Writer 13 | src io.ReaderAt 14 | 15 | closed bool 16 | dirty bool 17 | } 18 | 19 | // NewBinaryApplier creates an BinaryApplier that reads data from src and 20 | // writes modified data to dst. 21 | func NewBinaryApplier(dst io.Writer, src io.ReaderAt) *BinaryApplier { 22 | a := BinaryApplier{ 23 | dst: dst, 24 | src: src, 25 | } 26 | return &a 27 | } 28 | 29 | // ApplyFragment applies the changes in the fragment f and writes the result to 30 | // dst. ApplyFragment can be called at most once. 31 | // 32 | // If an error occurs while applying, ApplyFragment returns an *ApplyError that 33 | // annotates the error with additional information. If the error is because of 34 | // a conflict between a fragment and the source, the wrapped error will be a 35 | // *Conflict. 36 | func (a *BinaryApplier) ApplyFragment(f *BinaryFragment) error { 37 | if f == nil { 38 | return applyError(errors.New("nil fragment")) 39 | } 40 | if a.closed { 41 | return applyError(errApplierClosed) 42 | } 43 | if a.dirty { 44 | return applyError(errApplyInProgress) 45 | } 46 | 47 | // mark an apply as in progress, even if it fails before making changes 48 | a.dirty = true 49 | 50 | switch f.Method { 51 | case BinaryPatchLiteral: 52 | if _, err := a.dst.Write(f.Data); err != nil { 53 | return applyError(err) 54 | } 55 | case BinaryPatchDelta: 56 | if err := applyBinaryDeltaFragment(a.dst, a.src, f.Data); err != nil { 57 | return applyError(err) 58 | } 59 | default: 60 | return applyError(fmt.Errorf("unsupported binary patch method: %v", f.Method)) 61 | } 62 | return nil 63 | } 64 | 65 | // Close writes any data following the last applied fragment and prevents 66 | // future calls to ApplyFragment. 67 | func (a *BinaryApplier) Close() (err error) { 68 | if a.closed { 69 | return nil 70 | } 71 | 72 | a.closed = true 73 | if !a.dirty { 74 | _, err = copyFrom(a.dst, a.src, 0) 75 | } else { 76 | // do nothing, applying a binary fragment copies all data 77 | } 78 | return err 79 | } 80 | 81 | func applyBinaryDeltaFragment(dst io.Writer, src io.ReaderAt, frag []byte) error { 82 | srcSize, delta := readBinaryDeltaSize(frag) 83 | if err := checkBinarySrcSize(src, srcSize); err != nil { 84 | return err 85 | } 86 | 87 | dstSize, delta := readBinaryDeltaSize(delta) 88 | 89 | for len(delta) > 0 { 90 | op := delta[0] 91 | if op == 0 { 92 | return errors.New("invalid delta opcode 0") 93 | } 94 | 95 | var n int64 96 | var err error 97 | switch op & 0x80 { 98 | case 0x80: 99 | n, delta, err = applyBinaryDeltaCopy(dst, op, delta[1:], src) 100 | case 0x00: 101 | n, delta, err = applyBinaryDeltaAdd(dst, op, delta[1:]) 102 | } 103 | if err != nil { 104 | return err 105 | } 106 | dstSize -= n 107 | } 108 | 109 | if dstSize != 0 { 110 | return errors.New("corrupt binary delta: insufficient or extra data") 111 | } 112 | return nil 113 | } 114 | 115 | // readBinaryDeltaSize reads a variable length size from a delta-encoded binary 116 | // fragment, returing the size and the unused data. Data is encoded as: 117 | // 118 | // [[1xxxxxxx]...] [0xxxxxxx] 119 | // 120 | // in little-endian order, with 7 bits of the value per byte. 121 | func readBinaryDeltaSize(d []byte) (size int64, rest []byte) { 122 | shift := uint(0) 123 | for i, b := range d { 124 | size |= int64(b&0x7F) << shift 125 | shift += 7 126 | if b <= 0x7F { 127 | return size, d[i+1:] 128 | } 129 | } 130 | return size, nil 131 | } 132 | 133 | // applyBinaryDeltaAdd applies an add opcode in a delta-encoded binary 134 | // fragment, returning the amount of data written and the usused part of the 135 | // fragment. An add operation takes the form: 136 | // 137 | // [0xxxxxx][[data1]...] 138 | // 139 | // where the lower seven bits of the opcode is the number of data bytes 140 | // following the opcode. See also pack-format.txt in the Git source. 141 | func applyBinaryDeltaAdd(w io.Writer, op byte, delta []byte) (n int64, rest []byte, err error) { 142 | size := int(op) 143 | if len(delta) < size { 144 | return 0, delta, errors.New("corrupt binary delta: incomplete add") 145 | } 146 | _, err = w.Write(delta[:size]) 147 | return int64(size), delta[size:], err 148 | } 149 | 150 | // applyBinaryDeltaCopy applies a copy opcode in a delta-encoded binary 151 | // fragment, returing the amount of data written and the unused part of the 152 | // fragment. A copy operation takes the form: 153 | // 154 | // [1xxxxxxx][offset1][offset2][offset3][offset4][size1][size2][size3] 155 | // 156 | // where the lower seven bits of the opcode determine which non-zero offset and 157 | // size bytes are present in little-endian order: if bit 0 is set, offset1 is 158 | // present, etc. If no offset or size bytes are present, offset is 0 and size 159 | // is 0x10000. See also pack-format.txt in the Git source. 160 | func applyBinaryDeltaCopy(w io.Writer, op byte, delta []byte, src io.ReaderAt) (n int64, rest []byte, err error) { 161 | const defaultSize = 0x10000 162 | 163 | unpack := func(start, bits uint) (v int64) { 164 | for i := uint(0); i < bits; i++ { 165 | mask := byte(1 << (i + start)) 166 | if op&mask > 0 { 167 | if len(delta) == 0 { 168 | err = errors.New("corrupt binary delta: incomplete copy") 169 | return 170 | } 171 | v |= int64(delta[0]) << (8 * i) 172 | delta = delta[1:] 173 | } 174 | } 175 | return 176 | } 177 | 178 | offset := unpack(0, 4) 179 | size := unpack(4, 3) 180 | if err != nil { 181 | return 0, delta, err 182 | } 183 | if size == 0 { 184 | size = defaultSize 185 | } 186 | 187 | // TODO(bkeyes): consider pooling these buffers 188 | b := make([]byte, size) 189 | if _, err := src.ReadAt(b, offset); err != nil { 190 | return 0, delta, err 191 | } 192 | 193 | _, err = w.Write(b) 194 | return size, delta, err 195 | } 196 | 197 | func checkBinarySrcSize(r io.ReaderAt, size int64) error { 198 | ok, err := isLen(r, size) 199 | if err != nil { 200 | return err 201 | } 202 | if !ok { 203 | return &Conflict{"fragment src size does not match actual src size"} 204 | } 205 | return nil 206 | } 207 | -------------------------------------------------------------------------------- /gitdiff/apply_test.go: -------------------------------------------------------------------------------- 1 | package gitdiff 2 | 3 | import ( 4 | "bytes" 5 | "errors" 6 | "io" 7 | "io/ioutil" 8 | "path/filepath" 9 | "testing" 10 | ) 11 | 12 | func TestApplyTextFragment(t *testing.T) { 13 | tests := map[string]applyTest{ 14 | "createFile": {Files: getApplyFiles("text_fragment_new")}, 15 | "deleteFile": {Files: getApplyFiles("text_fragment_delete_all")}, 16 | 17 | "addStart": {Files: getApplyFiles("text_fragment_add_start")}, 18 | "addMiddle": {Files: getApplyFiles("text_fragment_add_middle")}, 19 | "addEnd": {Files: getApplyFiles("text_fragment_add_end")}, 20 | "addEndNoEOL": {Files: getApplyFiles("text_fragment_add_end_noeol")}, 21 | 22 | "changeStart": {Files: getApplyFiles("text_fragment_change_start")}, 23 | "changeMiddle": {Files: getApplyFiles("text_fragment_change_middle")}, 24 | "changeEnd": {Files: getApplyFiles("text_fragment_change_end")}, 25 | "changeEndEOL": {Files: getApplyFiles("text_fragment_change_end_eol")}, 26 | "changeExact": {Files: getApplyFiles("text_fragment_change_exact")}, 27 | "changeSingleNoEOL": {Files: getApplyFiles("text_fragment_change_single_noeol")}, 28 | 29 | "errorShortSrcBefore": { 30 | Files: applyFiles{ 31 | Src: "text_fragment_error.src", 32 | Patch: "text_fragment_error_short_src_before.patch", 33 | }, 34 | Err: &Conflict{}, 35 | }, 36 | "errorShortSrc": { 37 | Files: applyFiles{ 38 | Src: "text_fragment_error.src", 39 | Patch: "text_fragment_error_short_src.patch", 40 | }, 41 | Err: &Conflict{}, 42 | }, 43 | "errorContextConflict": { 44 | Files: applyFiles{ 45 | Src: "text_fragment_error.src", 46 | Patch: "text_fragment_error_context_conflict.patch", 47 | }, 48 | Err: &Conflict{}, 49 | }, 50 | "errorDeleteConflict": { 51 | Files: applyFiles{ 52 | Src: "text_fragment_error.src", 53 | Patch: "text_fragment_error_delete_conflict.patch", 54 | }, 55 | Err: &Conflict{}, 56 | }, 57 | "errorNewFile": { 58 | Files: applyFiles{ 59 | Src: "text_fragment_error.src", 60 | Patch: "text_fragment_error_new_file.patch", 61 | }, 62 | Err: &Conflict{}, 63 | }, 64 | } 65 | 66 | for name, test := range tests { 67 | t.Run(name, func(t *testing.T) { 68 | test.run(t, func(dst io.Writer, src io.ReaderAt, file *File) error { 69 | if len(file.TextFragments) != 1 { 70 | t.Fatalf("patch should contain exactly one fragment, but it has %d", len(file.TextFragments)) 71 | } 72 | applier := NewTextApplier(dst, src) 73 | return applier.ApplyFragment(file.TextFragments[0]) 74 | }) 75 | }) 76 | } 77 | } 78 | 79 | func TestApplyBinaryFragment(t *testing.T) { 80 | tests := map[string]applyTest{ 81 | "literalCreate": {Files: getApplyFiles("bin_fragment_literal_create")}, 82 | "literalModify": {Files: getApplyFiles("bin_fragment_literal_modify")}, 83 | "deltaModify": {Files: getApplyFiles("bin_fragment_delta_modify")}, 84 | "deltaModifyLarge": {Files: getApplyFiles("bin_fragment_delta_modify_large")}, 85 | 86 | "errorIncompleteAdd": { 87 | Files: applyFiles{ 88 | Src: "bin_fragment_delta_error.src", 89 | Patch: "bin_fragment_delta_error_incomplete_add.patch", 90 | }, 91 | Err: "incomplete add", 92 | }, 93 | "errorIncompleteCopy": { 94 | Files: applyFiles{ 95 | Src: "bin_fragment_delta_error.src", 96 | Patch: "bin_fragment_delta_error_incomplete_copy.patch", 97 | }, 98 | Err: "incomplete copy", 99 | }, 100 | "errorSrcSize": { 101 | Files: applyFiles{ 102 | Src: "bin_fragment_delta_error.src", 103 | Patch: "bin_fragment_delta_error_src_size.patch", 104 | }, 105 | Err: &Conflict{}, 106 | }, 107 | "errorDstSize": { 108 | Files: applyFiles{ 109 | Src: "bin_fragment_delta_error.src", 110 | Patch: "bin_fragment_delta_error_dst_size.patch", 111 | }, 112 | Err: "insufficient or extra data", 113 | }, 114 | } 115 | 116 | for name, test := range tests { 117 | t.Run(name, func(t *testing.T) { 118 | test.run(t, func(dst io.Writer, src io.ReaderAt, file *File) error { 119 | applier := NewBinaryApplier(dst, src) 120 | return applier.ApplyFragment(file.BinaryFragment) 121 | }) 122 | }) 123 | } 124 | } 125 | 126 | func TestApplyFile(t *testing.T) { 127 | tests := map[string]applyTest{ 128 | "textModify": { 129 | Files: applyFiles{ 130 | Src: "file_text.src", 131 | Patch: "file_text_modify.patch", 132 | Out: "file_text_modify.out", 133 | }, 134 | }, 135 | "textDelete": { 136 | Files: applyFiles{ 137 | Src: "file_text.src", 138 | Patch: "file_text_delete.patch", 139 | Out: "file_text_delete.out", 140 | }, 141 | }, 142 | "textErrorPartialDelete": { 143 | Files: applyFiles{ 144 | Src: "file_text.src", 145 | Patch: "file_text_error_partial_delete.patch", 146 | }, 147 | Err: &Conflict{}, 148 | }, 149 | "binaryModify": { 150 | Files: getApplyFiles("file_bin_modify"), 151 | }, 152 | "modeChange": { 153 | Files: getApplyFiles("file_mode_change"), 154 | }, 155 | } 156 | 157 | for name, test := range tests { 158 | t.Run(name, func(t *testing.T) { 159 | test.run(t, func(dst io.Writer, src io.ReaderAt, file *File) error { 160 | return Apply(dst, src, file) 161 | }) 162 | }) 163 | } 164 | } 165 | 166 | type applyTest struct { 167 | Files applyFiles 168 | Err interface{} 169 | } 170 | 171 | func (at applyTest) run(t *testing.T, apply func(io.Writer, io.ReaderAt, *File) error) { 172 | src, patch, out := at.Files.Load(t) 173 | 174 | files, _, err := Parse(bytes.NewReader(patch)) 175 | if err != nil { 176 | t.Fatalf("failed to parse patch file: %v", err) 177 | } 178 | if len(files) != 1 { 179 | t.Fatalf("patch should contain exactly one file, but it has %d", len(files)) 180 | } 181 | 182 | var dst bytes.Buffer 183 | err = apply(&dst, bytes.NewReader(src), files[0]) 184 | if at.Err != nil { 185 | assertError(t, at.Err, err, "applying fragment") 186 | return 187 | } 188 | if err != nil { 189 | var aerr *ApplyError 190 | if errors.As(err, &aerr) { 191 | t.Fatalf("unexpected error applying: at %d: fragment %d at %d: %v", aerr.Line, aerr.Fragment, aerr.FragmentLine, err) 192 | } else { 193 | t.Fatalf("unexpected error applying: %v", err) 194 | } 195 | } 196 | 197 | if !bytes.Equal(out, dst.Bytes()) { 198 | t.Errorf("incorrect result after apply\nexpected:\n%q\nactual:\n%q", out, dst.Bytes()) 199 | } 200 | } 201 | 202 | type applyFiles struct { 203 | Src string 204 | Patch string 205 | Out string 206 | } 207 | 208 | func getApplyFiles(name string) applyFiles { 209 | return applyFiles{ 210 | Src: name + ".src", 211 | Patch: name + ".patch", 212 | Out: name + ".out", 213 | } 214 | } 215 | 216 | func (f applyFiles) Load(t *testing.T) (src []byte, patch []byte, out []byte) { 217 | load := func(name, kind string) []byte { 218 | d, err := ioutil.ReadFile(filepath.Join("testdata", "apply", name)) 219 | if err != nil { 220 | t.Fatalf("failed to read %s file: %v", kind, err) 221 | } 222 | return d 223 | } 224 | 225 | if f.Src != "" { 226 | src = load(f.Src, "source") 227 | } 228 | if f.Patch != "" { 229 | patch = load(f.Patch, "patch") 230 | } 231 | if f.Out != "" { 232 | out = load(f.Out, "output") 233 | } 234 | return 235 | } 236 | -------------------------------------------------------------------------------- /gitdiff/apply_text.go: -------------------------------------------------------------------------------- 1 | package gitdiff 2 | 3 | import ( 4 | "errors" 5 | "io" 6 | ) 7 | 8 | // TextApplier applies changes described in text fragments to source data. If 9 | // changes are described in multiple fragments, those fragments must be applied 10 | // in order. The applier must be closed after use. 11 | // 12 | // By default, TextApplier operates in "strict" mode, where fragment content 13 | // and positions must exactly match those of the source. 14 | type TextApplier struct { 15 | dst io.Writer 16 | src io.ReaderAt 17 | lineSrc LineReaderAt 18 | nextLine int64 19 | 20 | closed bool 21 | dirty bool 22 | } 23 | 24 | // NewTextApplier creates a TextApplier that reads data from src and writes 25 | // modified data to dst. If src implements LineReaderAt, it is used directly. 26 | func NewTextApplier(dst io.Writer, src io.ReaderAt) *TextApplier { 27 | a := TextApplier{ 28 | dst: dst, 29 | src: src, 30 | } 31 | 32 | if lineSrc, ok := src.(LineReaderAt); ok { 33 | a.lineSrc = lineSrc 34 | } else { 35 | a.lineSrc = &lineReaderAt{r: src} 36 | } 37 | 38 | return &a 39 | } 40 | 41 | // ApplyFragment applies the changes in the fragment f, writing unwritten data 42 | // before the start of the fragment and any changes from the fragment. If 43 | // multiple text fragments apply to the same content, ApplyFragment must be 44 | // called in order of increasing start position. As a result, each fragment can 45 | // be applied at most once. 46 | // 47 | // If an error occurs while applying, ApplyFragment returns an *ApplyError that 48 | // annotates the error with additional information. If the error is because of 49 | // a conflict between the fragment and the source, the wrapped error will be a 50 | // *Conflict. 51 | func (a *TextApplier) ApplyFragment(f *TextFragment) error { 52 | if a.closed { 53 | return applyError(errApplierClosed) 54 | } 55 | 56 | // mark an apply as in progress, even if it fails before making changes 57 | a.dirty = true 58 | 59 | // application code assumes fragment fields are consistent 60 | if err := f.Validate(); err != nil { 61 | return applyError(err) 62 | } 63 | 64 | // lines are 0-indexed, positions are 1-indexed (but new files have position = 0) 65 | fragStart := f.OldPosition - 1 66 | if fragStart < 0 { 67 | fragStart = 0 68 | } 69 | fragEnd := fragStart + f.OldLines 70 | 71 | start := a.nextLine 72 | if fragStart < start { 73 | return applyError(&Conflict{"fragment overlaps with an applied fragment"}) 74 | } 75 | 76 | if f.OldPosition == 0 { 77 | ok, err := isLen(a.src, 0) 78 | if err != nil { 79 | return applyError(err) 80 | } 81 | if !ok { 82 | return applyError(&Conflict{"cannot create new file from non-empty src"}) 83 | } 84 | } 85 | 86 | preimage := make([][]byte, fragEnd-start) 87 | n, err := a.lineSrc.ReadLinesAt(preimage, start) 88 | if err != nil { 89 | // an EOF indicates that source file is shorter than the patch expects, 90 | // which should be reported as a conflict rather than a generic error 91 | if errors.Is(err, io.EOF) { 92 | err = &Conflict{"src has fewer lines than required by fragment"} 93 | } 94 | return applyError(err, lineNum(start+int64(n))) 95 | } 96 | 97 | // copy leading data before the fragment starts 98 | for i, line := range preimage[:fragStart-start] { 99 | if _, err := a.dst.Write(line); err != nil { 100 | a.nextLine = start + int64(i) 101 | return applyError(err, lineNum(a.nextLine)) 102 | } 103 | } 104 | preimage = preimage[fragStart-start:] 105 | 106 | // apply the changes in the fragment 107 | used := int64(0) 108 | for i, line := range f.Lines { 109 | if err := applyTextLine(a.dst, line, preimage, used); err != nil { 110 | a.nextLine = fragStart + used 111 | return applyError(err, lineNum(a.nextLine), fragLineNum(i)) 112 | } 113 | if line.Old() { 114 | used++ 115 | } 116 | } 117 | a.nextLine = fragStart + used 118 | 119 | // new position of +0,0 mean a full delete, so check for leftovers 120 | if f.NewPosition == 0 && f.NewLines == 0 { 121 | var b [1][]byte 122 | n, err := a.lineSrc.ReadLinesAt(b[:], a.nextLine) 123 | if err != nil && err != io.EOF { 124 | return applyError(err, lineNum(a.nextLine)) 125 | } 126 | if n > 0 { 127 | return applyError(&Conflict{"src still has content after full delete"}, lineNum(a.nextLine)) 128 | } 129 | } 130 | 131 | return nil 132 | } 133 | 134 | func applyTextLine(dst io.Writer, line Line, preimage [][]byte, i int64) (err error) { 135 | if line.Old() && string(preimage[i]) != line.Line { 136 | return &Conflict{"fragment line does not match src line"} 137 | } 138 | if line.New() { 139 | _, err = io.WriteString(dst, line.Line) 140 | } 141 | return err 142 | } 143 | 144 | // Close writes any data following the last applied fragment and prevents 145 | // future calls to ApplyFragment. 146 | func (a *TextApplier) Close() (err error) { 147 | if a.closed { 148 | return nil 149 | } 150 | 151 | a.closed = true 152 | if !a.dirty { 153 | _, err = copyFrom(a.dst, a.src, 0) 154 | } else { 155 | _, err = copyLinesFrom(a.dst, a.lineSrc, a.nextLine) 156 | } 157 | return err 158 | } 159 | -------------------------------------------------------------------------------- /gitdiff/assert_test.go: -------------------------------------------------------------------------------- 1 | package gitdiff 2 | 3 | import ( 4 | "errors" 5 | "strings" 6 | "testing" 7 | ) 8 | 9 | func assertError(t *testing.T, expected interface{}, actual error, action string) { 10 | if actual == nil { 11 | t.Fatalf("expected error %s, but got nil", action) 12 | } 13 | 14 | switch exp := expected.(type) { 15 | case bool: 16 | if !exp { 17 | t.Fatalf("unexpected error %s: %v", action, actual) 18 | } 19 | case string: 20 | if !strings.Contains(actual.Error(), exp) { 21 | t.Fatalf("incorrect error %s: %q does not contain %q", action, actual.Error(), exp) 22 | } 23 | case error: 24 | if !errors.Is(actual, exp) { 25 | t.Fatalf("incorrect error %s: expected %T (%v), actual: %T (%v)", action, exp, exp, actual, actual) 26 | } 27 | default: 28 | t.Fatalf("unsupported expected error type: %T", exp) 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /gitdiff/base85.go: -------------------------------------------------------------------------------- 1 | package gitdiff 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | var ( 8 | b85Table map[byte]byte 9 | b85Alpha = []byte( 10 | "0123456789" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "!#$%&()*+-;<=>?@^_`{|}~", 11 | ) 12 | ) 13 | 14 | func init() { 15 | b85Table = make(map[byte]byte) 16 | for i, c := range b85Alpha { 17 | b85Table[c] = byte(i) 18 | } 19 | } 20 | 21 | // base85Decode decodes Base85-encoded data from src into dst. It uses the 22 | // alphabet defined by base85.c in the Git source tree. src must contain at 23 | // least len(dst) bytes of encoded data. 24 | func base85Decode(dst, src []byte) error { 25 | var v uint32 26 | var n, ndst int 27 | for i, b := range src { 28 | if b, ok := b85Table[b]; ok { 29 | v = 85*v + uint32(b) 30 | n++ 31 | } else { 32 | return fmt.Errorf("invalid base85 byte at index %d: 0x%X", i, src[i]) 33 | } 34 | if n == 5 { 35 | rem := len(dst) - ndst 36 | for j := 0; j < 4 && j < rem; j++ { 37 | dst[ndst] = byte(v >> 24) 38 | ndst++ 39 | v <<= 8 40 | } 41 | v = 0 42 | n = 0 43 | } 44 | } 45 | if n > 0 { 46 | return fmt.Errorf("base85 data terminated by underpadded sequence") 47 | } 48 | if ndst < len(dst) { 49 | return fmt.Errorf("base85 data underrun: %d < %d", ndst, len(dst)) 50 | } 51 | return nil 52 | } 53 | 54 | // base85Encode encodes src in Base85, writing the result to dst. It uses the 55 | // alphabet defined by base85.c in the Git source tree. 56 | func base85Encode(dst, src []byte) { 57 | var di, si int 58 | 59 | encode := func(v uint32) { 60 | dst[di+0] = b85Alpha[(v/(85*85*85*85))%85] 61 | dst[di+1] = b85Alpha[(v/(85*85*85))%85] 62 | dst[di+2] = b85Alpha[(v/(85*85))%85] 63 | dst[di+3] = b85Alpha[(v/85)%85] 64 | dst[di+4] = b85Alpha[v%85] 65 | } 66 | 67 | n := (len(src) / 4) * 4 68 | for si < n { 69 | encode(uint32(src[si+0])<<24 | uint32(src[si+1])<<16 | uint32(src[si+2])<<8 | uint32(src[si+3])) 70 | si += 4 71 | di += 5 72 | } 73 | 74 | var v uint32 75 | switch len(src) - si { 76 | case 3: 77 | v |= uint32(src[si+2]) << 8 78 | fallthrough 79 | case 2: 80 | v |= uint32(src[si+1]) << 16 81 | fallthrough 82 | case 1: 83 | v |= uint32(src[si+0]) << 24 84 | encode(v) 85 | } 86 | } 87 | 88 | // base85Len returns the length of n bytes of Base85 encoded data. 89 | func base85Len(n int) int { 90 | return (n + 3) / 4 * 5 91 | } 92 | -------------------------------------------------------------------------------- /gitdiff/base85_test.go: -------------------------------------------------------------------------------- 1 | package gitdiff 2 | 3 | import ( 4 | "bytes" 5 | "testing" 6 | ) 7 | 8 | func TestBase85Decode(t *testing.T) { 9 | tests := map[string]struct { 10 | Input string 11 | Output []byte 12 | Err bool 13 | }{ 14 | "twoBytes": { 15 | Input: "%KiWV", 16 | Output: []byte{0xCA, 0xFE}, 17 | }, 18 | "fourBytes": { 19 | Input: "007GV", 20 | Output: []byte{0x0, 0x0, 0xCA, 0xFE}, 21 | }, 22 | "sixBytes": { 23 | Input: "007GV%KiWV", 24 | Output: []byte{0x0, 0x0, 0xCA, 0xFE, 0xCA, 0xFE}, 25 | }, 26 | "invalidCharacter": { 27 | Input: "00'GV", 28 | Err: true, 29 | }, 30 | "underpaddedSequence": { 31 | Input: "007G", 32 | Err: true, 33 | }, 34 | "dataUnderrun": { 35 | Input: "007GV", 36 | Output: make([]byte, 8), 37 | Err: true, 38 | }, 39 | } 40 | 41 | for name, test := range tests { 42 | t.Run(name, func(t *testing.T) { 43 | dst := make([]byte, len(test.Output)) 44 | err := base85Decode(dst, []byte(test.Input)) 45 | if test.Err { 46 | if err == nil { 47 | t.Fatalf("expected error decoding base85 data, but got nil") 48 | } 49 | return 50 | } 51 | if err != nil { 52 | t.Fatalf("unexpected error decoding base85 data: %v", err) 53 | } 54 | for i, b := range test.Output { 55 | if dst[i] != b { 56 | t.Errorf("incorrect byte at index %d: expected 0x%X, actual 0x%X", i, b, dst[i]) 57 | } 58 | } 59 | }) 60 | } 61 | } 62 | 63 | func TestBase85Encode(t *testing.T) { 64 | tests := map[string]struct { 65 | Input []byte 66 | Output string 67 | }{ 68 | "zeroBytes": { 69 | Input: []byte{}, 70 | Output: "", 71 | }, 72 | "twoBytes": { 73 | Input: []byte{0xCA, 0xFE}, 74 | Output: "%KiWV", 75 | }, 76 | "fourBytes": { 77 | Input: []byte{0x0, 0x0, 0xCA, 0xFE}, 78 | Output: "007GV", 79 | }, 80 | "sixBytes": { 81 | Input: []byte{0x0, 0x0, 0xCA, 0xFE, 0xCA, 0xFE}, 82 | Output: "007GV%KiWV", 83 | }, 84 | } 85 | 86 | for name, test := range tests { 87 | t.Run(name, func(t *testing.T) { 88 | dst := make([]byte, len(test.Output)) 89 | base85Encode(dst, test.Input) 90 | for i, b := range test.Output { 91 | if dst[i] != byte(b) { 92 | t.Errorf("incorrect character at index %d: expected '%c', actual '%c'", i, b, dst[i]) 93 | } 94 | } 95 | }) 96 | } 97 | } 98 | 99 | func FuzzBase85Roundtrip(f *testing.F) { 100 | f.Add([]byte{0x2b, 0x0d}) 101 | f.Add([]byte{0xbc, 0xb4, 0x3f}) 102 | f.Add([]byte{0xfa, 0x62, 0x05, 0x83, 0x24, 0x39, 0xd5, 0x25}) 103 | f.Add([]byte{0x31, 0x59, 0x02, 0xa0, 0x61, 0x12, 0xd9, 0x43, 0xb8, 0x23, 0x1a, 0xb4, 0x02, 0xae, 0xfa, 0xcc, 0x22, 0xad, 0x41, 0xb9, 0xb8}) 104 | 105 | f.Fuzz(func(t *testing.T, in []byte) { 106 | n := len(in) 107 | dst := make([]byte, base85Len(n)) 108 | out := make([]byte, n) 109 | 110 | base85Encode(dst, in) 111 | if err := base85Decode(out, dst); err != nil { 112 | t.Fatalf("unexpected error decoding base85 data: %v", err) 113 | } 114 | if !bytes.Equal(in, out) { 115 | t.Errorf("decoded data differed from input data:\n input: %x\n output: %x\nencoding: %s\n", in, out, string(dst)) 116 | } 117 | }) 118 | } 119 | -------------------------------------------------------------------------------- /gitdiff/binary.go: -------------------------------------------------------------------------------- 1 | package gitdiff 2 | 3 | import ( 4 | "bytes" 5 | "compress/zlib" 6 | "fmt" 7 | "io" 8 | "io/ioutil" 9 | "strconv" 10 | "strings" 11 | ) 12 | 13 | func (p *parser) ParseBinaryFragments(f *File) (n int, err error) { 14 | isBinary, hasData, err := p.ParseBinaryMarker() 15 | if err != nil || !isBinary { 16 | return 0, err 17 | } 18 | 19 | f.IsBinary = true 20 | if !hasData { 21 | return 0, nil 22 | } 23 | 24 | forward, err := p.ParseBinaryFragmentHeader() 25 | if err != nil { 26 | return 0, err 27 | } 28 | if forward == nil { 29 | return 0, p.Errorf(0, "missing data for binary patch") 30 | } 31 | if err := p.ParseBinaryChunk(forward); err != nil { 32 | return 0, err 33 | } 34 | f.BinaryFragment = forward 35 | 36 | // valid for reverse to not exist, but it must be valid if present 37 | reverse, err := p.ParseBinaryFragmentHeader() 38 | if err != nil { 39 | return 1, err 40 | } 41 | if reverse == nil { 42 | return 1, nil 43 | } 44 | if err := p.ParseBinaryChunk(reverse); err != nil { 45 | return 1, err 46 | } 47 | f.ReverseBinaryFragment = reverse 48 | 49 | return 1, nil 50 | } 51 | 52 | func (p *parser) ParseBinaryMarker() (isBinary bool, hasData bool, err error) { 53 | line := p.Line(0) 54 | switch { 55 | case line == "GIT binary patch\n": 56 | hasData = true 57 | case isBinaryNoDataMarker(line): 58 | default: 59 | return false, false, nil 60 | } 61 | 62 | if err = p.Next(); err != nil && err != io.EOF { 63 | return false, false, err 64 | } 65 | return true, hasData, nil 66 | } 67 | 68 | func isBinaryNoDataMarker(line string) bool { 69 | if strings.HasSuffix(line, " differ\n") { 70 | return strings.HasPrefix(line, "Binary files ") || strings.HasPrefix(line, "Files ") 71 | } 72 | return false 73 | } 74 | 75 | func (p *parser) ParseBinaryFragmentHeader() (*BinaryFragment, error) { 76 | parts := strings.SplitN(strings.TrimSuffix(p.Line(0), "\n"), " ", 2) 77 | if len(parts) < 2 { 78 | return nil, nil 79 | } 80 | 81 | frag := &BinaryFragment{} 82 | switch parts[0] { 83 | case "delta": 84 | frag.Method = BinaryPatchDelta 85 | case "literal": 86 | frag.Method = BinaryPatchLiteral 87 | default: 88 | return nil, nil 89 | } 90 | 91 | var err error 92 | if frag.Size, err = strconv.ParseInt(parts[1], 10, 64); err != nil { 93 | nerr := err.(*strconv.NumError) 94 | return nil, p.Errorf(0, "binary patch: invalid size: %v", nerr.Err) 95 | } 96 | 97 | if err := p.Next(); err != nil && err != io.EOF { 98 | return nil, err 99 | } 100 | return frag, nil 101 | } 102 | 103 | func (p *parser) ParseBinaryChunk(frag *BinaryFragment) error { 104 | // Binary fragments are encoded as a series of base85 encoded lines. Each 105 | // line starts with a character in [A-Za-z] giving the number of bytes on 106 | // the line, where A = 1 and z = 52, and ends with a newline character. 107 | // 108 | // The base85 encoding means each line is a multiple of 5 characters + 2 109 | // additional characters for the length byte and the newline. The fragment 110 | // ends with a blank line. 111 | const ( 112 | shortestValidLine = "A00000\n" 113 | maxBytesPerLine = 52 114 | ) 115 | 116 | var data bytes.Buffer 117 | buf := make([]byte, maxBytesPerLine) 118 | for { 119 | line := p.Line(0) 120 | if line == "\n" { 121 | break 122 | } 123 | if len(line) < len(shortestValidLine) || (len(line)-2)%5 != 0 { 124 | return p.Errorf(0, "binary patch: corrupt data line") 125 | } 126 | 127 | byteCount, seq := int(line[0]), line[1:len(line)-1] 128 | switch { 129 | case 'A' <= byteCount && byteCount <= 'Z': 130 | byteCount = byteCount - 'A' + 1 131 | case 'a' <= byteCount && byteCount <= 'z': 132 | byteCount = byteCount - 'a' + 27 133 | default: 134 | return p.Errorf(0, "binary patch: invalid length byte") 135 | } 136 | 137 | // base85 encodes every 4 bytes into 5 characters, with up to 3 bytes of end padding 138 | maxByteCount := len(seq) / 5 * 4 139 | if byteCount > maxByteCount || byteCount < maxByteCount-3 { 140 | return p.Errorf(0, "binary patch: incorrect byte count") 141 | } 142 | 143 | if err := base85Decode(buf[:byteCount], []byte(seq)); err != nil { 144 | return p.Errorf(0, "binary patch: %v", err) 145 | } 146 | data.Write(buf[:byteCount]) 147 | 148 | if err := p.Next(); err != nil { 149 | if err == io.EOF { 150 | return p.Errorf(0, "binary patch: unexpected EOF") 151 | } 152 | return err 153 | } 154 | } 155 | 156 | if err := inflateBinaryChunk(frag, &data); err != nil { 157 | return p.Errorf(0, "binary patch: %v", err) 158 | } 159 | 160 | // consume the empty line that ended the fragment 161 | if err := p.Next(); err != nil && err != io.EOF { 162 | return err 163 | } 164 | return nil 165 | } 166 | 167 | func inflateBinaryChunk(frag *BinaryFragment, r io.Reader) error { 168 | zr, err := zlib.NewReader(r) 169 | if err != nil { 170 | return err 171 | } 172 | 173 | data, err := ioutil.ReadAll(zr) 174 | if err != nil { 175 | return err 176 | } 177 | if err := zr.Close(); err != nil { 178 | return err 179 | } 180 | 181 | if int64(len(data)) != frag.Size { 182 | return fmt.Errorf("%d byte fragment inflated to %d", frag.Size, len(data)) 183 | } 184 | frag.Data = data 185 | return nil 186 | } 187 | -------------------------------------------------------------------------------- /gitdiff/binary_test.go: -------------------------------------------------------------------------------- 1 | package gitdiff 2 | 3 | import ( 4 | "encoding/binary" 5 | "io" 6 | "reflect" 7 | "strings" 8 | "testing" 9 | ) 10 | 11 | func TestParseBinaryMarker(t *testing.T) { 12 | tests := map[string]struct { 13 | Input string 14 | IsBinary bool 15 | HasData bool 16 | Err bool 17 | }{ 18 | "binaryPatch": { 19 | Input: "GIT binary patch\n", 20 | IsBinary: true, 21 | HasData: true, 22 | }, 23 | "binaryFileNoPatch": { 24 | Input: "Binary files differ\n", 25 | IsBinary: true, 26 | HasData: false, 27 | }, 28 | "binaryFileNoPatchPaths": { 29 | Input: "Binary files a/foo.bin and b/foo.bin differ\n", 30 | IsBinary: true, 31 | HasData: false, 32 | }, 33 | "fileNoPatch": { 34 | Input: "Files differ\n", 35 | IsBinary: true, 36 | HasData: false, 37 | }, 38 | "textFile": { 39 | Input: "@@ -10,14 +22,31 @@\n", 40 | IsBinary: false, 41 | HasData: false, 42 | }, 43 | } 44 | 45 | for name, test := range tests { 46 | t.Run(name, func(t *testing.T) { 47 | p := newTestParser(test.Input, true) 48 | 49 | isBinary, hasData, err := p.ParseBinaryMarker() 50 | if test.Err { 51 | if err == nil || err == io.EOF { 52 | t.Fatalf("expected error parsing binary marker, but got %v", err) 53 | } 54 | return 55 | } 56 | if err != nil { 57 | t.Fatalf("unexpected error parsing binary marker: %v", err) 58 | } 59 | if test.IsBinary != isBinary { 60 | t.Errorf("incorrect isBinary value: expected %t, actual %t", test.IsBinary, isBinary) 61 | } 62 | if test.HasData != hasData { 63 | t.Errorf("incorrect hasData value: expected %t, actual %t", test.HasData, hasData) 64 | } 65 | }) 66 | } 67 | } 68 | 69 | func TestParseBinaryFragmentHeader(t *testing.T) { 70 | tests := map[string]struct { 71 | Input string 72 | Output *BinaryFragment 73 | Err bool 74 | }{ 75 | "delta": { 76 | Input: "delta 1234\n", 77 | Output: &BinaryFragment{ 78 | Method: BinaryPatchDelta, 79 | Size: 1234, 80 | }, 81 | }, 82 | "literal": { 83 | Input: "literal 1234\n", 84 | Output: &BinaryFragment{ 85 | Method: BinaryPatchLiteral, 86 | Size: 1234, 87 | }, 88 | }, 89 | "unknownMethod": { 90 | Input: "compressed 1234\n", 91 | Output: nil, 92 | }, 93 | "notAHeader": { 94 | Input: "Binary files differ\n", 95 | Output: nil, 96 | }, 97 | "invalidSize": { 98 | Input: "delta 123abc\n", 99 | Err: true, 100 | }, 101 | } 102 | 103 | for name, test := range tests { 104 | t.Run(name, func(t *testing.T) { 105 | p := newTestParser(test.Input, true) 106 | 107 | frag, err := p.ParseBinaryFragmentHeader() 108 | if test.Err { 109 | if err == nil || err == io.EOF { 110 | t.Fatalf("expected error parsing binary header, but got %v", err) 111 | } 112 | return 113 | } 114 | if err != nil { 115 | t.Fatalf("unexpected error parsing binary header: %v", err) 116 | } 117 | if !reflect.DeepEqual(test.Output, frag) { 118 | t.Errorf("incorrect binary fragment\nexpected: %+v\n actual: %+v", test.Output, frag) 119 | } 120 | }) 121 | } 122 | } 123 | 124 | func TestParseBinaryChunk(t *testing.T) { 125 | tests := map[string]struct { 126 | Input string 127 | Fragment BinaryFragment 128 | Output []byte 129 | Err string 130 | }{ 131 | "singleline": { 132 | Input: "TcmZQzU|?i`U?w2V48*Je09XJG\n\n", 133 | Fragment: BinaryFragment{ 134 | Size: 20, 135 | }, 136 | Output: fib(5, binary.BigEndian), 137 | }, 138 | "multiline": { 139 | Input: "zcmZQzU|?i`U?w2V48*KJ%mKu_Kr9NxNf->s?WfX|B-=Vs{#X~svra7Ekg#T|4s}nH;WnAZ)|1Y*`&cB\n" + 141 | "s(sh?X(Uz6L^!Ou&aF*u`J!eibJifSrv0z>$Q%Hd(^HIJ=3 valid lines 129 | if err := p.Next(); err != nil { 130 | return nil, err 131 | } 132 | if err := p.Next(); err != nil { 133 | return nil, err 134 | } 135 | 136 | oldName, _, err := parseName(oldLine[len(oldPrefix):], '\t', 0) 137 | if err != nil { 138 | return nil, p.Errorf(0, "file header: %v", err) 139 | } 140 | 141 | newName, _, err := parseName(newLine[len(newPrefix):], '\t', 0) 142 | if err != nil { 143 | return nil, p.Errorf(1, "file header: %v", err) 144 | } 145 | 146 | f := &File{} 147 | switch { 148 | case oldName == devNull || hasEpochTimestamp(oldLine): 149 | f.IsNew = true 150 | f.NewName = newName 151 | case newName == devNull || hasEpochTimestamp(newLine): 152 | f.IsDelete = true 153 | f.OldName = oldName 154 | default: 155 | // if old name is a prefix of new name, use that instead 156 | // this avoids picking variants like "file.bak" or "file~" 157 | if strings.HasPrefix(newName, oldName) { 158 | f.OldName = oldName 159 | f.NewName = oldName 160 | } else { 161 | f.OldName = newName 162 | f.NewName = newName 163 | } 164 | } 165 | 166 | return f, nil 167 | } 168 | 169 | // parseGitHeaderName extracts a default file name from the Git file header 170 | // line. This is required for mode-only changes and creation/deletion of empty 171 | // files. Other types of patch include the file name(s) in the header data. 172 | // If the names in the header do not match because the patch is a rename, 173 | // return an empty default name. 174 | func parseGitHeaderName(header string) (string, error) { 175 | header = strings.TrimSuffix(header, "\n") 176 | if len(header) == 0 { 177 | return "", nil 178 | } 179 | 180 | var err error 181 | var first, second string 182 | 183 | // there are 4 cases to account for: 184 | // 185 | // 1) unquoted unquoted 186 | // 2) unquoted "quoted" 187 | // 3) "quoted" unquoted 188 | // 4) "quoted" "quoted" 189 | // 190 | quote := strings.IndexByte(header, '"') 191 | switch { 192 | case quote < 0: 193 | // case 1 194 | first = header 195 | 196 | case quote > 0: 197 | // case 2 198 | first = header[:quote-1] 199 | if !isSpace(header[quote-1]) { 200 | return "", fmt.Errorf("missing separator") 201 | } 202 | 203 | second, _, err = parseQuotedName(header[quote:]) 204 | if err != nil { 205 | return "", err 206 | } 207 | 208 | case quote == 0: 209 | // case 3 or case 4 210 | var n int 211 | first, n, err = parseQuotedName(header) 212 | if err != nil { 213 | return "", err 214 | } 215 | 216 | // git accepts multiple spaces after a quoted name, but not after an 217 | // unquoted name, since the name might end with one or more spaces 218 | for n < len(header) && isSpace(header[n]) { 219 | n++ 220 | } 221 | if n == len(header) { 222 | return "", nil 223 | } 224 | 225 | if header[n] == '"' { 226 | second, _, err = parseQuotedName(header[n:]) 227 | if err != nil { 228 | return "", err 229 | } 230 | } else { 231 | second = header[n:] 232 | } 233 | } 234 | 235 | first = trimTreePrefix(first, 1) 236 | if second != "" { 237 | if first == trimTreePrefix(second, 1) { 238 | return first, nil 239 | } 240 | return "", nil 241 | } 242 | 243 | // at this point, both names are unquoted (case 1) 244 | // since names may contain spaces, we can't use a known separator 245 | // instead, look for a split that produces two equal names 246 | 247 | for i := 0; i < len(first)-1; i++ { 248 | if !isSpace(first[i]) { 249 | continue 250 | } 251 | second = trimTreePrefix(first[i+1:], 1) 252 | if name := first[:i]; name == second { 253 | return name, nil 254 | } 255 | } 256 | return "", nil 257 | } 258 | 259 | // parseGitHeaderData parses a single line of metadata from a Git file header. 260 | // It returns true when header parsing is complete; in that case, line was the 261 | // first line of non-header content. 262 | func parseGitHeaderData(f *File, line, defaultName string) (end bool, err error) { 263 | if len(line) > 0 && line[len(line)-1] == '\n' { 264 | line = line[:len(line)-1] 265 | } 266 | 267 | for _, hdr := range []struct { 268 | prefix string 269 | end bool 270 | parse func(*File, string, string) error 271 | }{ 272 | {"@@ -", true, nil}, 273 | {"--- ", false, parseGitHeaderOldName}, 274 | {"+++ ", false, parseGitHeaderNewName}, 275 | {"old mode ", false, parseGitHeaderOldMode}, 276 | {"new mode ", false, parseGitHeaderNewMode}, 277 | {"deleted file mode ", false, parseGitHeaderDeletedMode}, 278 | {"new file mode ", false, parseGitHeaderCreatedMode}, 279 | {"copy from ", false, parseGitHeaderCopyFrom}, 280 | {"copy to ", false, parseGitHeaderCopyTo}, 281 | {"rename old ", false, parseGitHeaderRenameFrom}, 282 | {"rename new ", false, parseGitHeaderRenameTo}, 283 | {"rename from ", false, parseGitHeaderRenameFrom}, 284 | {"rename to ", false, parseGitHeaderRenameTo}, 285 | {"similarity index ", false, parseGitHeaderScore}, 286 | {"dissimilarity index ", false, parseGitHeaderScore}, 287 | {"index ", false, parseGitHeaderIndex}, 288 | } { 289 | if strings.HasPrefix(line, hdr.prefix) { 290 | if hdr.parse != nil { 291 | err = hdr.parse(f, line[len(hdr.prefix):], defaultName) 292 | } 293 | return hdr.end, err 294 | } 295 | } 296 | 297 | // unknown line indicates the end of the header 298 | // this usually happens if the diff is empty 299 | return true, nil 300 | } 301 | 302 | func parseGitHeaderOldName(f *File, line, defaultName string) error { 303 | name, _, err := parseName(line, '\t', 1) 304 | if err != nil { 305 | return err 306 | } 307 | if f.OldName == "" && !f.IsNew { 308 | f.OldName = name 309 | return nil 310 | } 311 | return verifyGitHeaderName(name, f.OldName, f.IsNew, "old") 312 | } 313 | 314 | func parseGitHeaderNewName(f *File, line, defaultName string) error { 315 | name, _, err := parseName(line, '\t', 1) 316 | if err != nil { 317 | return err 318 | } 319 | if f.NewName == "" && !f.IsDelete { 320 | f.NewName = name 321 | return nil 322 | } 323 | return verifyGitHeaderName(name, f.NewName, f.IsDelete, "new") 324 | } 325 | 326 | func parseGitHeaderOldMode(f *File, line, defaultName string) (err error) { 327 | f.OldMode, err = parseMode(strings.TrimSpace(line)) 328 | return 329 | } 330 | 331 | func parseGitHeaderNewMode(f *File, line, defaultName string) (err error) { 332 | f.NewMode, err = parseMode(strings.TrimSpace(line)) 333 | return 334 | } 335 | 336 | func parseGitHeaderDeletedMode(f *File, line, defaultName string) error { 337 | f.IsDelete = true 338 | f.OldName = defaultName 339 | return parseGitHeaderOldMode(f, line, defaultName) 340 | } 341 | 342 | func parseGitHeaderCreatedMode(f *File, line, defaultName string) error { 343 | f.IsNew = true 344 | f.NewName = defaultName 345 | return parseGitHeaderNewMode(f, line, defaultName) 346 | } 347 | 348 | func parseGitHeaderCopyFrom(f *File, line, defaultName string) (err error) { 349 | f.IsCopy = true 350 | f.OldName, _, err = parseName(line, 0, 0) 351 | return 352 | } 353 | 354 | func parseGitHeaderCopyTo(f *File, line, defaultName string) (err error) { 355 | f.IsCopy = true 356 | f.NewName, _, err = parseName(line, 0, 0) 357 | return 358 | } 359 | 360 | func parseGitHeaderRenameFrom(f *File, line, defaultName string) (err error) { 361 | f.IsRename = true 362 | f.OldName, _, err = parseName(line, 0, 0) 363 | return 364 | } 365 | 366 | func parseGitHeaderRenameTo(f *File, line, defaultName string) (err error) { 367 | f.IsRename = true 368 | f.NewName, _, err = parseName(line, 0, 0) 369 | return 370 | } 371 | 372 | func parseGitHeaderScore(f *File, line, defaultName string) error { 373 | score, err := strconv.ParseInt(strings.TrimSuffix(line, "%"), 10, 32) 374 | if err != nil { 375 | nerr := err.(*strconv.NumError) 376 | return fmt.Errorf("invalid score line: %v", nerr.Err) 377 | } 378 | if score <= 100 { 379 | f.Score = int(score) 380 | } 381 | return nil 382 | } 383 | 384 | func parseGitHeaderIndex(f *File, line, defaultName string) error { 385 | const sep = ".." 386 | 387 | // note that git stops parsing if the OIDs are too long to be valid 388 | // checking this requires knowing if the repository uses SHA1 or SHA256 389 | // hashes, which we don't know, so we just skip that check 390 | 391 | parts := strings.SplitN(line, " ", 2) 392 | oids := strings.SplitN(parts[0], sep, 2) 393 | 394 | if len(oids) < 2 { 395 | return fmt.Errorf("invalid index line: missing %q", sep) 396 | } 397 | f.OldOIDPrefix, f.NewOIDPrefix = oids[0], oids[1] 398 | 399 | if len(parts) > 1 { 400 | return parseGitHeaderOldMode(f, parts[1], defaultName) 401 | } 402 | return nil 403 | } 404 | 405 | func parseMode(s string) (os.FileMode, error) { 406 | mode, err := strconv.ParseInt(s, 8, 32) 407 | if err != nil { 408 | nerr := err.(*strconv.NumError) 409 | return os.FileMode(0), fmt.Errorf("invalid mode line: %v", nerr.Err) 410 | } 411 | return os.FileMode(mode), nil 412 | } 413 | 414 | // parseName extracts a file name from the start of a string and returns the 415 | // name and the index of the first character after the name. If the name is 416 | // unquoted and term is non-zero, parsing stops at the first occurrence of 417 | // term. 418 | // 419 | // If the name is exactly "/dev/null", no further processing occurs. Otherwise, 420 | // if dropPrefix is greater than zero, that number of prefix components 421 | // separated by forward slashes are dropped from the name and any duplicate 422 | // slashes are collapsed. 423 | func parseName(s string, term byte, dropPrefix int) (name string, n int, err error) { 424 | if len(s) > 0 && s[0] == '"' { 425 | name, n, err = parseQuotedName(s) 426 | } else { 427 | name, n, err = parseUnquotedName(s, term) 428 | } 429 | if err != nil { 430 | return "", 0, err 431 | } 432 | if name == devNull { 433 | return name, n, nil 434 | } 435 | return cleanName(name, dropPrefix), n, nil 436 | } 437 | 438 | func parseQuotedName(s string) (name string, n int, err error) { 439 | for n = 1; n < len(s); n++ { 440 | if s[n] == '"' && s[n-1] != '\\' { 441 | n++ 442 | break 443 | } 444 | } 445 | if n == 2 { 446 | return "", 0, fmt.Errorf("missing name") 447 | } 448 | if name, err = strconv.Unquote(s[:n]); err != nil { 449 | return "", 0, err 450 | } 451 | return name, n, err 452 | } 453 | 454 | func parseUnquotedName(s string, term byte) (name string, n int, err error) { 455 | for n = 0; n < len(s); n++ { 456 | if s[n] == '\n' { 457 | break 458 | } 459 | if term > 0 && s[n] == term { 460 | break 461 | } 462 | } 463 | if n == 0 { 464 | return "", 0, fmt.Errorf("missing name") 465 | } 466 | return s[:n], n, nil 467 | } 468 | 469 | // verifyGitHeaderName checks a parsed name against state set by previous lines 470 | func verifyGitHeaderName(parsed, existing string, isNull bool, side string) error { 471 | if existing != "" { 472 | if isNull { 473 | return fmt.Errorf("expected %s, but filename is set to %s", devNull, existing) 474 | } 475 | if existing != parsed { 476 | return fmt.Errorf("inconsistent %s filename", side) 477 | } 478 | } 479 | if isNull && parsed != devNull { 480 | return fmt.Errorf("expected %s", devNull) 481 | } 482 | return nil 483 | } 484 | 485 | // cleanName removes double slashes and drops prefix segments. 486 | func cleanName(name string, drop int) string { 487 | var b strings.Builder 488 | for i := 0; i < len(name); i++ { 489 | if name[i] == '/' { 490 | if i < len(name)-1 && name[i+1] == '/' { 491 | continue 492 | } 493 | if drop > 0 { 494 | drop-- 495 | b.Reset() 496 | continue 497 | } 498 | } 499 | b.WriteByte(name[i]) 500 | } 501 | return b.String() 502 | } 503 | 504 | // trimTreePrefix removes up to n leading directory components from name. 505 | func trimTreePrefix(name string, n int) string { 506 | i := 0 507 | for ; i < len(name) && n > 0; i++ { 508 | if name[i] == '/' { 509 | n-- 510 | } 511 | } 512 | return name[i:] 513 | } 514 | 515 | // hasEpochTimestamp returns true if the string ends with a POSIX-formatted 516 | // timestamp for the UNIX epoch after a tab character. According to git, this 517 | // is used by GNU diff to mark creations and deletions. 518 | func hasEpochTimestamp(s string) bool { 519 | const posixTimeLayout = "2006-01-02 15:04:05.9 -0700" 520 | 521 | start := strings.IndexRune(s, '\t') 522 | if start < 0 { 523 | return false 524 | } 525 | 526 | ts := strings.TrimSuffix(s[start+1:], "\n") 527 | 528 | // a valid timestamp can have optional ':' in zone specifier 529 | // remove that if it exists so we have a single format 530 | if len(ts) >= 3 && ts[len(ts)-3] == ':' { 531 | ts = ts[:len(ts)-3] + ts[len(ts)-2:] 532 | } 533 | 534 | t, err := time.Parse(posixTimeLayout, ts) 535 | if err != nil { 536 | return false 537 | } 538 | if !t.Equal(time.Unix(0, 0)) { 539 | return false 540 | } 541 | return true 542 | } 543 | 544 | func isSpace(c byte) bool { 545 | return c == ' ' || c == '\t' || c == '\n' 546 | } 547 | -------------------------------------------------------------------------------- /gitdiff/format.go: -------------------------------------------------------------------------------- 1 | package gitdiff 2 | 3 | import ( 4 | "bytes" 5 | "compress/zlib" 6 | "fmt" 7 | "io" 8 | "strconv" 9 | ) 10 | 11 | type formatter struct { 12 | w io.Writer 13 | err error 14 | } 15 | 16 | func newFormatter(w io.Writer) *formatter { 17 | return &formatter{w: w} 18 | } 19 | 20 | func (fm *formatter) Write(p []byte) (int, error) { 21 | if fm.err != nil { 22 | return len(p), nil 23 | } 24 | if _, err := fm.w.Write(p); err != nil { 25 | fm.err = err 26 | } 27 | return len(p), nil 28 | } 29 | 30 | func (fm *formatter) WriteString(s string) (int, error) { 31 | fm.Write([]byte(s)) 32 | return len(s), nil 33 | } 34 | 35 | func (fm *formatter) WriteByte(c byte) error { 36 | fm.Write([]byte{c}) 37 | return nil 38 | } 39 | 40 | func (fm *formatter) WriteQuotedName(s string) { 41 | qpos := 0 42 | for i := 0; i < len(s); i++ { 43 | ch := s[i] 44 | if q, quoted := quoteByte(ch); quoted { 45 | if qpos == 0 { 46 | fm.WriteByte('"') 47 | } 48 | fm.WriteString(s[qpos:i]) 49 | fm.Write(q) 50 | qpos = i + 1 51 | } 52 | } 53 | fm.WriteString(s[qpos:]) 54 | if qpos > 0 { 55 | fm.WriteByte('"') 56 | } 57 | } 58 | 59 | var quoteEscapeTable = map[byte]byte{ 60 | '\a': 'a', 61 | '\b': 'b', 62 | '\t': 't', 63 | '\n': 'n', 64 | '\v': 'v', 65 | '\f': 'f', 66 | '\r': 'r', 67 | '"': '"', 68 | '\\': '\\', 69 | } 70 | 71 | func quoteByte(b byte) ([]byte, bool) { 72 | if q, ok := quoteEscapeTable[b]; ok { 73 | return []byte{'\\', q}, true 74 | } 75 | if b < 0x20 || b >= 0x7F { 76 | return []byte{ 77 | '\\', 78 | '0' + (b>>6)&0o3, 79 | '0' + (b>>3)&0o7, 80 | '0' + (b>>0)&0o7, 81 | }, true 82 | } 83 | return nil, false 84 | } 85 | 86 | func (fm *formatter) FormatFile(f *File) { 87 | fm.WriteString("diff --git ") 88 | 89 | var aName, bName string 90 | switch { 91 | case f.OldName == "": 92 | aName = f.NewName 93 | bName = f.NewName 94 | 95 | case f.NewName == "": 96 | aName = f.OldName 97 | bName = f.OldName 98 | 99 | default: 100 | aName = f.OldName 101 | bName = f.NewName 102 | } 103 | 104 | fm.WriteQuotedName("a/" + aName) 105 | fm.WriteByte(' ') 106 | fm.WriteQuotedName("b/" + bName) 107 | fm.WriteByte('\n') 108 | 109 | if f.OldMode != 0 { 110 | if f.IsDelete { 111 | fmt.Fprintf(fm, "deleted file mode %o\n", f.OldMode) 112 | } else if f.NewMode != 0 { 113 | fmt.Fprintf(fm, "old mode %o\n", f.OldMode) 114 | } 115 | } 116 | 117 | if f.NewMode != 0 { 118 | if f.IsNew { 119 | fmt.Fprintf(fm, "new file mode %o\n", f.NewMode) 120 | } else if f.OldMode != 0 { 121 | fmt.Fprintf(fm, "new mode %o\n", f.NewMode) 122 | } 123 | } 124 | 125 | if f.Score > 0 { 126 | if f.IsCopy || f.IsRename { 127 | fmt.Fprintf(fm, "similarity index %d%%\n", f.Score) 128 | } else { 129 | fmt.Fprintf(fm, "dissimilarity index %d%%\n", f.Score) 130 | } 131 | } 132 | 133 | if f.IsCopy { 134 | if f.OldName != "" { 135 | fm.WriteString("copy from ") 136 | fm.WriteQuotedName(f.OldName) 137 | fm.WriteByte('\n') 138 | } 139 | if f.NewName != "" { 140 | fm.WriteString("copy to ") 141 | fm.WriteQuotedName(f.NewName) 142 | fm.WriteByte('\n') 143 | } 144 | } 145 | 146 | if f.IsRename { 147 | if f.OldName != "" { 148 | fm.WriteString("rename from ") 149 | fm.WriteQuotedName(f.OldName) 150 | fm.WriteByte('\n') 151 | } 152 | if f.NewName != "" { 153 | fm.WriteString("rename to ") 154 | fm.WriteQuotedName(f.NewName) 155 | fm.WriteByte('\n') 156 | } 157 | } 158 | 159 | if f.OldOIDPrefix != "" && f.NewOIDPrefix != "" { 160 | fmt.Fprintf(fm, "index %s..%s", f.OldOIDPrefix, f.NewOIDPrefix) 161 | 162 | // Mode is only included on the index line when it is not changing 163 | if f.OldMode != 0 && ((f.NewMode == 0 && !f.IsDelete) || f.OldMode == f.NewMode) { 164 | fmt.Fprintf(fm, " %o", f.OldMode) 165 | } 166 | 167 | fm.WriteByte('\n') 168 | } 169 | 170 | if f.IsBinary { 171 | if f.BinaryFragment == nil { 172 | fm.WriteString("Binary files ") 173 | fm.WriteQuotedName("a/" + aName) 174 | fm.WriteString(" and ") 175 | fm.WriteQuotedName("b/" + bName) 176 | fm.WriteString(" differ\n") 177 | } else { 178 | fm.WriteString("GIT binary patch\n") 179 | fm.FormatBinaryFragment(f.BinaryFragment) 180 | if f.ReverseBinaryFragment != nil { 181 | fm.FormatBinaryFragment(f.ReverseBinaryFragment) 182 | } 183 | } 184 | } 185 | 186 | // The "---" and "+++" lines only appear for text patches with fragments 187 | if len(f.TextFragments) > 0 { 188 | fm.WriteString("--- ") 189 | if f.OldName == "" { 190 | fm.WriteString("/dev/null") 191 | } else { 192 | fm.WriteQuotedName("a/" + f.OldName) 193 | } 194 | fm.WriteByte('\n') 195 | 196 | fm.WriteString("+++ ") 197 | if f.NewName == "" { 198 | fm.WriteString("/dev/null") 199 | } else { 200 | fm.WriteQuotedName("b/" + f.NewName) 201 | } 202 | fm.WriteByte('\n') 203 | 204 | for _, frag := range f.TextFragments { 205 | fm.FormatTextFragment(frag) 206 | } 207 | } 208 | } 209 | 210 | func (fm *formatter) FormatTextFragment(f *TextFragment) { 211 | fm.FormatTextFragmentHeader(f) 212 | fm.WriteByte('\n') 213 | 214 | for _, line := range f.Lines { 215 | fm.WriteString(line.Op.String()) 216 | fm.WriteString(line.Line) 217 | if line.NoEOL() { 218 | fm.WriteString("\n\\ No newline at end of file\n") 219 | } 220 | } 221 | } 222 | 223 | func (fm *formatter) FormatTextFragmentHeader(f *TextFragment) { 224 | fmt.Fprintf(fm, "@@ -%d,%d +%d,%d @@", f.OldPosition, f.OldLines, f.NewPosition, f.NewLines) 225 | if f.Comment != "" { 226 | fm.WriteByte(' ') 227 | fm.WriteString(f.Comment) 228 | } 229 | } 230 | 231 | func (fm *formatter) FormatBinaryFragment(f *BinaryFragment) { 232 | const ( 233 | maxBytesPerLine = 52 234 | ) 235 | 236 | switch f.Method { 237 | case BinaryPatchDelta: 238 | fm.WriteString("delta ") 239 | case BinaryPatchLiteral: 240 | fm.WriteString("literal ") 241 | } 242 | fm.Write(strconv.AppendInt(nil, f.Size, 10)) 243 | fm.WriteByte('\n') 244 | 245 | data := deflateBinaryChunk(f.Data) 246 | n := (len(data) / maxBytesPerLine) * maxBytesPerLine 247 | 248 | buf := make([]byte, base85Len(maxBytesPerLine)) 249 | for i := 0; i < n; i += maxBytesPerLine { 250 | base85Encode(buf, data[i:i+maxBytesPerLine]) 251 | fm.WriteByte('z') 252 | fm.Write(buf) 253 | fm.WriteByte('\n') 254 | } 255 | if remainder := len(data) - n; remainder > 0 { 256 | buf = buf[0:base85Len(remainder)] 257 | 258 | sizeChar := byte(remainder) 259 | if remainder <= 26 { 260 | sizeChar = 'A' + sizeChar - 1 261 | } else { 262 | sizeChar = 'a' + sizeChar - 27 263 | } 264 | 265 | base85Encode(buf, data[n:]) 266 | fm.WriteByte(sizeChar) 267 | fm.Write(buf) 268 | fm.WriteByte('\n') 269 | } 270 | fm.WriteByte('\n') 271 | } 272 | 273 | func deflateBinaryChunk(data []byte) []byte { 274 | var b bytes.Buffer 275 | 276 | zw := zlib.NewWriter(&b) 277 | _, _ = zw.Write(data) 278 | _ = zw.Close() 279 | 280 | return b.Bytes() 281 | } 282 | -------------------------------------------------------------------------------- /gitdiff/format_roundtrip_test.go: -------------------------------------------------------------------------------- 1 | package gitdiff 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "os" 7 | "path/filepath" 8 | "slices" 9 | "testing" 10 | ) 11 | 12 | func TestFormatRoundtrip(t *testing.T) { 13 | patches := []struct { 14 | File string 15 | SkipTextCompare bool 16 | }{ 17 | {File: "copy.patch"}, 18 | {File: "copy_modify.patch"}, 19 | {File: "delete.patch"}, 20 | {File: "mode.patch"}, 21 | {File: "mode_modify.patch"}, 22 | {File: "modify.patch"}, 23 | {File: "new.patch"}, 24 | {File: "new_empty.patch"}, 25 | {File: "new_mode.patch"}, 26 | {File: "rename.patch"}, 27 | {File: "rename_modify.patch"}, 28 | 29 | // Due to differences between Go's 'encoding/zlib' package and the zlib 30 | // C library, binary patches cannot be compared directly as the patch 31 | // data is slightly different when re-encoded by Go. 32 | {File: "binary_modify.patch", SkipTextCompare: true}, 33 | {File: "binary_new.patch", SkipTextCompare: true}, 34 | {File: "binary_modify_nodata.patch"}, 35 | } 36 | 37 | for _, patch := range patches { 38 | t.Run(patch.File, func(t *testing.T) { 39 | b, err := os.ReadFile(filepath.Join("testdata", "string", patch.File)) 40 | if err != nil { 41 | t.Fatalf("failed to read patch: %v", err) 42 | } 43 | 44 | original := assertParseSingleFile(t, b, "patch") 45 | str := original.String() 46 | 47 | if !patch.SkipTextCompare { 48 | if string(b) != str { 49 | t.Errorf("incorrect patch text\nexpected: %q\n actual: %q\n", string(b), str) 50 | } 51 | } 52 | 53 | reparsed := assertParseSingleFile(t, []byte(str), "formatted patch") 54 | assertFilesEqual(t, original, reparsed) 55 | }) 56 | } 57 | } 58 | 59 | func assertParseSingleFile(t *testing.T, b []byte, kind string) *File { 60 | files, _, err := Parse(bytes.NewReader(b)) 61 | if err != nil { 62 | t.Fatalf("failed to parse %s: %v", kind, err) 63 | } 64 | if len(files) != 1 { 65 | t.Fatalf("expected %s to contain a single files, but found %d", kind, len(files)) 66 | } 67 | return files[0] 68 | } 69 | 70 | func assertFilesEqual(t *testing.T, expected, actual *File) { 71 | assertEqual(t, expected.OldName, actual.OldName, "OldName") 72 | assertEqual(t, expected.NewName, actual.NewName, "NewName") 73 | 74 | assertEqual(t, expected.IsNew, actual.IsNew, "IsNew") 75 | assertEqual(t, expected.IsDelete, actual.IsDelete, "IsDelete") 76 | assertEqual(t, expected.IsCopy, actual.IsCopy, "IsCopy") 77 | assertEqual(t, expected.IsRename, actual.IsRename, "IsRename") 78 | 79 | assertEqual(t, expected.OldMode, actual.OldMode, "OldMode") 80 | assertEqual(t, expected.NewMode, actual.NewMode, "NewMode") 81 | 82 | assertEqual(t, expected.OldOIDPrefix, actual.OldOIDPrefix, "OldOIDPrefix") 83 | assertEqual(t, expected.NewOIDPrefix, actual.NewOIDPrefix, "NewOIDPrefix") 84 | assertEqual(t, expected.Score, actual.Score, "Score") 85 | 86 | if len(expected.TextFragments) == len(actual.TextFragments) { 87 | for i := range expected.TextFragments { 88 | prefix := fmt.Sprintf("TextFragments[%d].", i) 89 | ef := expected.TextFragments[i] 90 | af := actual.TextFragments[i] 91 | 92 | assertEqual(t, ef.Comment, af.Comment, prefix+"Comment") 93 | 94 | assertEqual(t, ef.OldPosition, af.OldPosition, prefix+"OldPosition") 95 | assertEqual(t, ef.OldLines, af.OldLines, prefix+"OldLines") 96 | 97 | assertEqual(t, ef.NewPosition, af.NewPosition, prefix+"NewPosition") 98 | assertEqual(t, ef.NewLines, af.NewLines, prefix+"NewLines") 99 | 100 | assertEqual(t, ef.LinesAdded, af.LinesAdded, prefix+"LinesAdded") 101 | assertEqual(t, ef.LinesDeleted, af.LinesDeleted, prefix+"LinesDeleted") 102 | 103 | assertEqual(t, ef.LeadingContext, af.LeadingContext, prefix+"LeadingContext") 104 | assertEqual(t, ef.TrailingContext, af.TrailingContext, prefix+"TrailingContext") 105 | 106 | if !slices.Equal(ef.Lines, af.Lines) { 107 | t.Errorf("%sLines: expected %#v, actual %#v", prefix, ef.Lines, af.Lines) 108 | } 109 | } 110 | } else { 111 | t.Errorf("TextFragments: expected length %d, actual length %d", len(expected.TextFragments), len(actual.TextFragments)) 112 | } 113 | 114 | assertEqual(t, expected.IsBinary, actual.IsBinary, "IsBinary") 115 | 116 | if expected.BinaryFragment != nil { 117 | if actual.BinaryFragment == nil { 118 | t.Errorf("BinaryFragment: expected non-nil, actual is nil") 119 | } else { 120 | ef := expected.BinaryFragment 121 | af := expected.BinaryFragment 122 | 123 | assertEqual(t, ef.Method, af.Method, "BinaryFragment.Method") 124 | assertEqual(t, ef.Size, af.Size, "BinaryFragment.Size") 125 | 126 | if !slices.Equal(ef.Data, af.Data) { 127 | t.Errorf("BinaryFragment.Data: expected %#v, actual %#v", ef.Data, af.Data) 128 | } 129 | } 130 | } else if actual.BinaryFragment != nil { 131 | t.Errorf("BinaryFragment: expected nil, actual is non-nil") 132 | } 133 | 134 | if expected.ReverseBinaryFragment != nil { 135 | if actual.ReverseBinaryFragment == nil { 136 | t.Errorf("ReverseBinaryFragment: expected non-nil, actual is nil") 137 | } else { 138 | ef := expected.ReverseBinaryFragment 139 | af := expected.ReverseBinaryFragment 140 | 141 | assertEqual(t, ef.Method, af.Method, "ReverseBinaryFragment.Method") 142 | assertEqual(t, ef.Size, af.Size, "ReverseBinaryFragment.Size") 143 | 144 | if !slices.Equal(ef.Data, af.Data) { 145 | t.Errorf("ReverseBinaryFragment.Data: expected %#v, actual %#v", ef.Data, af.Data) 146 | } 147 | } 148 | } else if actual.ReverseBinaryFragment != nil { 149 | t.Errorf("ReverseBinaryFragment: expected nil, actual is non-nil") 150 | } 151 | } 152 | 153 | func assertEqual[T comparable](t *testing.T, expected, actual T, name string) { 154 | if expected != actual { 155 | t.Errorf("%s: expected %#v, actual %#v", name, expected, actual) 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /gitdiff/format_test.go: -------------------------------------------------------------------------------- 1 | package gitdiff 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | ) 7 | 8 | func TestFormatter_WriteQuotedName(t *testing.T) { 9 | tests := []struct { 10 | Input string 11 | Expected string 12 | }{ 13 | {"noquotes.txt", `noquotes.txt`}, 14 | {"no quotes.txt", `no quotes.txt`}, 15 | {"new\nline", `"new\nline"`}, 16 | {"escape\x1B null\x00", `"escape\033 null\000"`}, 17 | {"snowman \u2603 snowman", `"snowman \342\230\203 snowman"`}, 18 | {"\"already quoted\"", `"\"already quoted\""`}, 19 | } 20 | 21 | for _, test := range tests { 22 | var b strings.Builder 23 | newFormatter(&b).WriteQuotedName(test.Input) 24 | if b.String() != test.Expected { 25 | t.Errorf("expected %q, got %q", test.Expected, b.String()) 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /gitdiff/gitdiff.go: -------------------------------------------------------------------------------- 1 | package gitdiff 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "os" 7 | "strings" 8 | ) 9 | 10 | // File describes changes to a single file. It can be either a text file or a 11 | // binary file. 12 | type File struct { 13 | OldName string 14 | NewName string 15 | 16 | IsNew bool 17 | IsDelete bool 18 | IsCopy bool 19 | IsRename bool 20 | 21 | OldMode os.FileMode 22 | NewMode os.FileMode 23 | 24 | OldOIDPrefix string 25 | NewOIDPrefix string 26 | Score int 27 | 28 | // TextFragments contains the fragments describing changes to a text file. It 29 | // may be empty if the file is empty or if only the mode changes. 30 | TextFragments []*TextFragment 31 | 32 | // IsBinary is true if the file is a binary file. If the patch includes 33 | // binary data, BinaryFragment will be non-nil and describe the changes to 34 | // the data. If the patch is reversible, ReverseBinaryFragment will also be 35 | // non-nil and describe the changes needed to restore the original file 36 | // after applying the changes in BinaryFragment. 37 | IsBinary bool 38 | BinaryFragment *BinaryFragment 39 | ReverseBinaryFragment *BinaryFragment 40 | } 41 | 42 | // String returns a git diff representation of this file. The value can be 43 | // parsed by this library to obtain the same File, but may not be the same as 44 | // the original input. 45 | func (f *File) String() string { 46 | var diff strings.Builder 47 | newFormatter(&diff).FormatFile(f) 48 | return diff.String() 49 | } 50 | 51 | // TextFragment describes changed lines starting at a specific line in a text file. 52 | type TextFragment struct { 53 | Comment string 54 | 55 | OldPosition int64 56 | OldLines int64 57 | 58 | NewPosition int64 59 | NewLines int64 60 | 61 | LinesAdded int64 62 | LinesDeleted int64 63 | 64 | LeadingContext int64 65 | TrailingContext int64 66 | 67 | Lines []Line 68 | } 69 | 70 | // String returns a git diff format of this fragment. See [File.String] for 71 | // more details on this format. 72 | func (f *TextFragment) String() string { 73 | var diff strings.Builder 74 | newFormatter(&diff).FormatTextFragment(f) 75 | return diff.String() 76 | } 77 | 78 | // Header returns a git diff header of this fragment. See [File.String] for 79 | // more details on this format. 80 | func (f *TextFragment) Header() string { 81 | var hdr strings.Builder 82 | newFormatter(&hdr).FormatTextFragmentHeader(f) 83 | return hdr.String() 84 | } 85 | 86 | // Validate checks that the fragment is self-consistent and appliable. Validate 87 | // returns an error if and only if the fragment is invalid. 88 | func (f *TextFragment) Validate() error { 89 | if f == nil { 90 | return errors.New("nil fragment") 91 | } 92 | 93 | var ( 94 | oldLines, newLines int64 95 | leadingContext, trailingContext int64 96 | contextLines, addedLines, deletedLines int64 97 | ) 98 | 99 | // count the types of lines in the fragment content 100 | for i, line := range f.Lines { 101 | switch line.Op { 102 | case OpContext: 103 | oldLines++ 104 | newLines++ 105 | contextLines++ 106 | if addedLines == 0 && deletedLines == 0 { 107 | leadingContext++ 108 | } else { 109 | trailingContext++ 110 | } 111 | case OpAdd: 112 | newLines++ 113 | addedLines++ 114 | trailingContext = 0 115 | case OpDelete: 116 | oldLines++ 117 | deletedLines++ 118 | trailingContext = 0 119 | default: 120 | return fmt.Errorf("unknown operator %q on line %d", line.Op, i+1) 121 | } 122 | } 123 | 124 | // check the actual counts against the reported counts 125 | if oldLines != f.OldLines { 126 | return lineCountErr("old", oldLines, f.OldLines) 127 | } 128 | if newLines != f.NewLines { 129 | return lineCountErr("new", newLines, f.NewLines) 130 | } 131 | if leadingContext != f.LeadingContext { 132 | return lineCountErr("leading context", leadingContext, f.LeadingContext) 133 | } 134 | if trailingContext != f.TrailingContext { 135 | return lineCountErr("trailing context", trailingContext, f.TrailingContext) 136 | } 137 | if addedLines != f.LinesAdded { 138 | return lineCountErr("added", addedLines, f.LinesAdded) 139 | } 140 | if deletedLines != f.LinesDeleted { 141 | return lineCountErr("deleted", deletedLines, f.LinesDeleted) 142 | } 143 | 144 | // if a file is being created, it can only contain additions 145 | if f.OldPosition == 0 && f.OldLines != 0 { 146 | return errors.New("file creation fragment contains context or deletion lines") 147 | } 148 | 149 | return nil 150 | } 151 | 152 | func lineCountErr(kind string, actual, reported int64) error { 153 | return fmt.Errorf("fragment contains %d %s lines but reports %d", actual, kind, reported) 154 | } 155 | 156 | // Line is a line in a text fragment. 157 | type Line struct { 158 | Op LineOp 159 | Line string 160 | } 161 | 162 | func (fl Line) String() string { 163 | return fl.Op.String() + fl.Line 164 | } 165 | 166 | // Old returns true if the line appears in the old content of the fragment. 167 | func (fl Line) Old() bool { 168 | return fl.Op == OpContext || fl.Op == OpDelete 169 | } 170 | 171 | // New returns true if the line appears in the new content of the fragment. 172 | func (fl Line) New() bool { 173 | return fl.Op == OpContext || fl.Op == OpAdd 174 | } 175 | 176 | // NoEOL returns true if the line is missing a trailing newline character. 177 | func (fl Line) NoEOL() bool { 178 | return len(fl.Line) == 0 || fl.Line[len(fl.Line)-1] != '\n' 179 | } 180 | 181 | // LineOp describes the type of a text fragment line: context, added, or removed. 182 | type LineOp int 183 | 184 | const ( 185 | // OpContext indicates a context line 186 | OpContext LineOp = iota 187 | // OpDelete indicates a deleted line 188 | OpDelete 189 | // OpAdd indicates an added line 190 | OpAdd 191 | ) 192 | 193 | func (op LineOp) String() string { 194 | switch op { 195 | case OpContext: 196 | return " " 197 | case OpDelete: 198 | return "-" 199 | case OpAdd: 200 | return "+" 201 | } 202 | return "?" 203 | } 204 | 205 | // BinaryFragment describes changes to a binary file. 206 | type BinaryFragment struct { 207 | Method BinaryPatchMethod 208 | Size int64 209 | Data []byte 210 | } 211 | 212 | // BinaryPatchMethod is the method used to create and apply the binary patch. 213 | type BinaryPatchMethod int 214 | 215 | const ( 216 | // BinaryPatchDelta indicates the data uses Git's packfile encoding 217 | BinaryPatchDelta BinaryPatchMethod = iota 218 | // BinaryPatchLiteral indicates the data is the exact file content 219 | BinaryPatchLiteral 220 | ) 221 | 222 | // String returns a git diff format of this fragment. Due to differences in 223 | // zlib implementation between Go and Git, encoded binary data in the result 224 | // will likely differ from what Git produces for the same input. See 225 | // [File.String] for more details on this format. 226 | func (f *BinaryFragment) String() string { 227 | var diff strings.Builder 228 | newFormatter(&diff).FormatBinaryFragment(f) 229 | return diff.String() 230 | } 231 | -------------------------------------------------------------------------------- /gitdiff/gitdiff_test.go: -------------------------------------------------------------------------------- 1 | package gitdiff 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | ) 7 | 8 | func TestTextFragmentValidate(t *testing.T) { 9 | tests := map[string]struct { 10 | Fragment TextFragment 11 | Err string 12 | }{ 13 | "oldLines": { 14 | Fragment: TextFragment{ 15 | OldPosition: 1, 16 | OldLines: 3, 17 | NewPosition: 1, 18 | NewLines: 2, 19 | LeadingContext: 1, 20 | TrailingContext: 0, 21 | LinesAdded: 1, 22 | LinesDeleted: 1, 23 | Lines: []Line{ 24 | {Op: OpContext, Line: "line 1\n"}, 25 | {Op: OpDelete, Line: "old line 2\n"}, 26 | {Op: OpAdd, Line: "new line 2\n"}, 27 | }, 28 | }, 29 | Err: "2 old lines", 30 | }, 31 | "newLines": { 32 | Fragment: TextFragment{ 33 | OldPosition: 1, 34 | OldLines: 2, 35 | NewPosition: 1, 36 | NewLines: 3, 37 | LeadingContext: 1, 38 | TrailingContext: 0, 39 | LinesAdded: 1, 40 | LinesDeleted: 1, 41 | Lines: []Line{ 42 | {Op: OpContext, Line: "line 1\n"}, 43 | {Op: OpDelete, Line: "old line 2\n"}, 44 | {Op: OpAdd, Line: "new line 2\n"}, 45 | }, 46 | }, 47 | Err: "2 new lines", 48 | }, 49 | "leadingContext": { 50 | Fragment: TextFragment{ 51 | OldPosition: 1, 52 | OldLines: 2, 53 | NewPosition: 1, 54 | NewLines: 2, 55 | LeadingContext: 0, 56 | TrailingContext: 0, 57 | LinesAdded: 1, 58 | LinesDeleted: 1, 59 | Lines: []Line{ 60 | {Op: OpContext, Line: "line 1\n"}, 61 | {Op: OpDelete, Line: "old line 2\n"}, 62 | {Op: OpAdd, Line: "new line 2\n"}, 63 | }, 64 | }, 65 | Err: "1 leading context lines", 66 | }, 67 | "trailingContext": { 68 | Fragment: TextFragment{ 69 | OldPosition: 1, 70 | OldLines: 4, 71 | NewPosition: 1, 72 | NewLines: 3, 73 | LeadingContext: 1, 74 | TrailingContext: 1, 75 | LinesAdded: 1, 76 | LinesDeleted: 2, 77 | Lines: []Line{ 78 | {Op: OpContext, Line: "line 1\n"}, 79 | {Op: OpDelete, Line: "old line 2\n"}, 80 | {Op: OpAdd, Line: "new line 2\n"}, 81 | {Op: OpContext, Line: "line 3\n"}, 82 | {Op: OpDelete, Line: "old line 4\n"}, 83 | }, 84 | }, 85 | Err: "0 trailing context lines", 86 | }, 87 | "linesAdded": { 88 | Fragment: TextFragment{ 89 | OldPosition: 1, 90 | OldLines: 4, 91 | NewPosition: 1, 92 | NewLines: 3, 93 | LeadingContext: 1, 94 | TrailingContext: 0, 95 | LinesAdded: 2, 96 | LinesDeleted: 2, 97 | Lines: []Line{ 98 | {Op: OpContext, Line: "line 1\n"}, 99 | {Op: OpDelete, Line: "old line 2\n"}, 100 | {Op: OpAdd, Line: "new line 2\n"}, 101 | {Op: OpContext, Line: "line 3\n"}, 102 | {Op: OpDelete, Line: "old line 4\n"}, 103 | }, 104 | }, 105 | Err: "1 added lines", 106 | }, 107 | "linesDeleted": { 108 | Fragment: TextFragment{ 109 | OldPosition: 1, 110 | OldLines: 4, 111 | NewPosition: 1, 112 | NewLines: 3, 113 | LeadingContext: 1, 114 | TrailingContext: 0, 115 | LinesAdded: 1, 116 | LinesDeleted: 1, 117 | Lines: []Line{ 118 | {Op: OpContext, Line: "line 1\n"}, 119 | {Op: OpDelete, Line: "old line 2\n"}, 120 | {Op: OpAdd, Line: "new line 2\n"}, 121 | {Op: OpContext, Line: "line 3\n"}, 122 | {Op: OpDelete, Line: "old line 4\n"}, 123 | }, 124 | }, 125 | Err: "2 deleted lines", 126 | }, 127 | "fileCreation": { 128 | Fragment: TextFragment{ 129 | OldPosition: 0, 130 | OldLines: 2, 131 | NewPosition: 1, 132 | NewLines: 1, 133 | LeadingContext: 0, 134 | TrailingContext: 0, 135 | LinesAdded: 1, 136 | LinesDeleted: 2, 137 | Lines: []Line{ 138 | {Op: OpDelete, Line: "old line 1\n"}, 139 | {Op: OpDelete, Line: "old line 2\n"}, 140 | {Op: OpAdd, Line: "new line\n"}, 141 | }, 142 | }, 143 | Err: "creation fragment", 144 | }, 145 | } 146 | 147 | for name, test := range tests { 148 | t.Run(name, func(t *testing.T) { 149 | err := test.Fragment.Validate() 150 | if test.Err == "" && err != nil { 151 | t.Fatalf("unexpected validation error: %v", err) 152 | } 153 | if test.Err != "" && err == nil { 154 | t.Fatal("expected validation error, but got nil") 155 | } 156 | if !strings.Contains(err.Error(), test.Err) { 157 | t.Fatalf("incorrect validation error: %q is not in %q", test.Err, err.Error()) 158 | } 159 | }) 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /gitdiff/io.go: -------------------------------------------------------------------------------- 1 | package gitdiff 2 | 3 | import ( 4 | "errors" 5 | "io" 6 | ) 7 | 8 | const ( 9 | byteBufferSize = 32 * 1024 // from io.Copy 10 | lineBufferSize = 32 11 | indexBufferSize = 1024 12 | ) 13 | 14 | // LineReaderAt is the interface that wraps the ReadLinesAt method. 15 | // 16 | // ReadLinesAt reads len(lines) into lines starting at line offset. It returns 17 | // the number of lines read (0 <= n <= len(lines)) and any error encountered. 18 | // Line numbers are zero-indexed. 19 | // 20 | // If n < len(lines), ReadLinesAt returns a non-nil error explaining why more 21 | // lines were not returned. 22 | // 23 | // Lines read by ReadLinesAt include the newline character. The last line does 24 | // not have a final newline character if the input ends without one. 25 | type LineReaderAt interface { 26 | ReadLinesAt(lines [][]byte, offset int64) (n int, err error) 27 | } 28 | 29 | type lineReaderAt struct { 30 | r io.ReaderAt 31 | index []int64 32 | eof bool 33 | } 34 | 35 | func (r *lineReaderAt) ReadLinesAt(lines [][]byte, offset int64) (n int, err error) { 36 | if offset < 0 { 37 | return 0, errors.New("ReadLinesAt: negative offset") 38 | } 39 | if len(lines) == 0 { 40 | return 0, nil 41 | } 42 | 43 | count := len(lines) 44 | startLine := offset 45 | endLine := startLine + int64(count) 46 | 47 | if endLine > int64(len(r.index)) && !r.eof { 48 | if err := r.indexTo(endLine); err != nil { 49 | return 0, err 50 | } 51 | } 52 | if startLine >= int64(len(r.index)) { 53 | return 0, io.EOF 54 | } 55 | 56 | buf, byteOffset, err := r.readBytes(startLine, int64(count)) 57 | if err != nil { 58 | return 0, err 59 | } 60 | 61 | for n = 0; n < count && startLine+int64(n) < int64(len(r.index)); n++ { 62 | lineno := startLine + int64(n) 63 | start, end := int64(0), r.index[lineno]-byteOffset 64 | if lineno > 0 { 65 | start = r.index[lineno-1] - byteOffset 66 | } 67 | lines[n] = buf[start:end] 68 | } 69 | 70 | if n < count { 71 | return n, io.EOF 72 | } 73 | return n, nil 74 | } 75 | 76 | // indexTo reads data and computes the line index until there is information 77 | // for line or a read returns io.EOF. It returns an error if and only if there 78 | // is an error reading data. 79 | func (r *lineReaderAt) indexTo(line int64) error { 80 | var buf [indexBufferSize]byte 81 | 82 | offset := r.lastOffset() 83 | for int64(len(r.index)) < line { 84 | n, err := r.r.ReadAt(buf[:], offset) 85 | if err != nil && err != io.EOF { 86 | return err 87 | } 88 | for _, b := range buf[:n] { 89 | offset++ 90 | if b == '\n' { 91 | r.index = append(r.index, offset) 92 | } 93 | } 94 | if err == io.EOF { 95 | if offset > r.lastOffset() { 96 | r.index = append(r.index, offset) 97 | } 98 | r.eof = true 99 | break 100 | } 101 | } 102 | return nil 103 | } 104 | 105 | func (r *lineReaderAt) lastOffset() int64 { 106 | if n := len(r.index); n > 0 { 107 | return r.index[n-1] 108 | } 109 | return 0 110 | } 111 | 112 | // readBytes reads the bytes of the n lines starting at line and returns the 113 | // bytes and the offset of the first byte in the underlying source. 114 | func (r *lineReaderAt) readBytes(line, n int64) (b []byte, offset int64, err error) { 115 | indexLen := int64(len(r.index)) 116 | 117 | var size int64 118 | if line > indexLen { 119 | offset = r.index[indexLen-1] 120 | } else if line > 0 { 121 | offset = r.index[line-1] 122 | } 123 | if n > 0 { 124 | if line+n > indexLen { 125 | size = r.index[indexLen-1] - offset 126 | } else { 127 | size = r.index[line+n-1] - offset 128 | } 129 | } 130 | 131 | b = make([]byte, size) 132 | if _, err := r.r.ReadAt(b, offset); err != nil { 133 | if err == io.EOF { 134 | err = errors.New("ReadLinesAt: corrupt line index or changed source data") 135 | } 136 | return nil, 0, err 137 | } 138 | return b, offset, nil 139 | } 140 | 141 | func isLen(r io.ReaderAt, n int64) (bool, error) { 142 | off := n - 1 143 | if off < 0 { 144 | off = 0 145 | } 146 | 147 | var b [2]byte 148 | nr, err := r.ReadAt(b[:], off) 149 | if err == io.EOF { 150 | return (n == 0 && nr == 0) || (n > 0 && nr == 1), nil 151 | } 152 | return false, err 153 | } 154 | 155 | // copyFrom writes bytes starting from offset off in src to dst stopping at the 156 | // end of src or at the first error. copyFrom returns the number of bytes 157 | // written and any error. 158 | func copyFrom(dst io.Writer, src io.ReaderAt, off int64) (written int64, err error) { 159 | buf := make([]byte, byteBufferSize) 160 | for { 161 | nr, rerr := src.ReadAt(buf, off) 162 | if nr > 0 { 163 | nw, werr := dst.Write(buf[0:nr]) 164 | if nw > 0 { 165 | written += int64(nw) 166 | } 167 | if werr != nil { 168 | err = werr 169 | break 170 | } 171 | if nr != nw { 172 | err = io.ErrShortWrite 173 | break 174 | } 175 | off += int64(nr) 176 | } 177 | if rerr != nil { 178 | if rerr != io.EOF { 179 | err = rerr 180 | } 181 | break 182 | } 183 | } 184 | return written, err 185 | } 186 | 187 | // copyLinesFrom writes lines starting from line off in src to dst stopping at 188 | // the end of src or at the first error. copyLinesFrom returns the number of 189 | // lines written and any error. 190 | func copyLinesFrom(dst io.Writer, src LineReaderAt, off int64) (written int64, err error) { 191 | buf := make([][]byte, lineBufferSize) 192 | ReadLoop: 193 | for { 194 | nr, rerr := src.ReadLinesAt(buf, off) 195 | if nr > 0 { 196 | for _, line := range buf[0:nr] { 197 | nw, werr := dst.Write(line) 198 | if nw > 0 { 199 | written++ 200 | } 201 | if werr != nil { 202 | err = werr 203 | break ReadLoop 204 | } 205 | if len(line) != nw { 206 | err = io.ErrShortWrite 207 | break ReadLoop 208 | } 209 | } 210 | off += int64(nr) 211 | } 212 | if rerr != nil { 213 | if rerr != io.EOF { 214 | err = rerr 215 | } 216 | break 217 | } 218 | } 219 | return written, err 220 | } 221 | -------------------------------------------------------------------------------- /gitdiff/io_test.go: -------------------------------------------------------------------------------- 1 | package gitdiff 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "io" 7 | "math/rand" 8 | "testing" 9 | ) 10 | 11 | func TestLineReaderAt(t *testing.T) { 12 | const lineTemplate = "generated test line %d\n" 13 | 14 | tests := map[string]struct { 15 | InputLines int 16 | Offset int64 17 | Count int 18 | Err bool 19 | EOF bool 20 | EOFCount int 21 | }{ 22 | "readLines": { 23 | InputLines: 32, 24 | Offset: 0, 25 | Count: 4, 26 | }, 27 | "readLinesOffset": { 28 | InputLines: 32, 29 | Offset: 8, 30 | Count: 4, 31 | }, 32 | "readLinesLargeOffset": { 33 | InputLines: 8192, 34 | Offset: 4096, 35 | Count: 64, 36 | }, 37 | "readSingleLine": { 38 | InputLines: 4, 39 | Offset: 2, 40 | Count: 1, 41 | }, 42 | "readZeroLines": { 43 | InputLines: 4, 44 | Offset: 2, 45 | Count: 0, 46 | }, 47 | "readAllLines": { 48 | InputLines: 64, 49 | Offset: 0, 50 | Count: 64, 51 | }, 52 | "readThroughEOF": { 53 | InputLines: 16, 54 | Offset: 12, 55 | Count: 8, 56 | EOF: true, 57 | EOFCount: 4, 58 | }, 59 | "emptyInput": { 60 | InputLines: 0, 61 | Offset: 0, 62 | Count: 2, 63 | EOF: true, 64 | EOFCount: 0, 65 | }, 66 | "offsetAfterEOF": { 67 | InputLines: 8, 68 | Offset: 10, 69 | Count: 2, 70 | EOF: true, 71 | EOFCount: 0, 72 | }, 73 | "offsetNegative": { 74 | InputLines: 8, 75 | Offset: -1, 76 | Count: 2, 77 | Err: true, 78 | }, 79 | } 80 | 81 | for name, test := range tests { 82 | t.Run(name, func(t *testing.T) { 83 | var input bytes.Buffer 84 | for i := 0; i < test.InputLines; i++ { 85 | fmt.Fprintf(&input, lineTemplate, i) 86 | } 87 | 88 | output := make([][]byte, test.Count) 89 | for i := 0; i < test.Count; i++ { 90 | output[i] = []byte(fmt.Sprintf(lineTemplate, test.Offset+int64(i))) 91 | } 92 | 93 | r := &lineReaderAt{r: bytes.NewReader(input.Bytes())} 94 | lines := make([][]byte, test.Count) 95 | 96 | n, err := r.ReadLinesAt(lines, test.Offset) 97 | if test.Err { 98 | if err == nil { 99 | t.Fatal("expected error reading lines, but got nil") 100 | } 101 | return 102 | } 103 | if err != nil && (!test.EOF || err != io.EOF) { 104 | t.Fatalf("unexpected error reading lines: %v", err) 105 | } 106 | 107 | count := test.Count 108 | if test.EOF { 109 | count = test.EOFCount 110 | } 111 | 112 | if n != count { 113 | t.Fatalf("incorrect number of lines read: expected %d, actual %d", count, n) 114 | } 115 | for i := 0; i < n; i++ { 116 | if !bytes.Equal(output[i], lines[i]) { 117 | t.Errorf("incorrect content in line %d:\nexpected: %q\nactual: %q", i, output[i], lines[i]) 118 | } 119 | } 120 | }) 121 | } 122 | 123 | newlineTests := map[string]struct { 124 | InputSize int 125 | }{ 126 | "readLinesNoFinalNewline": { 127 | InputSize: indexBufferSize + indexBufferSize/2, 128 | }, 129 | "readLinesNoFinalNewlineBufferMultiple": { 130 | InputSize: 4 * indexBufferSize, 131 | }, 132 | } 133 | 134 | for name, test := range newlineTests { 135 | t.Run(name, func(t *testing.T) { 136 | input := bytes.Repeat([]byte("0"), test.InputSize) 137 | 138 | var output [][]byte 139 | for i := 0; i < len(input); i++ { 140 | last := i 141 | i += rand.Intn(80) 142 | if i < len(input)-1 { // last character of input must not be a newline 143 | input[i] = '\n' 144 | output = append(output, input[last:i+1]) 145 | } else { 146 | output = append(output, input[last:]) 147 | } 148 | } 149 | 150 | r := &lineReaderAt{r: bytes.NewReader(input)} 151 | lines := make([][]byte, len(output)) 152 | 153 | n, err := r.ReadLinesAt(lines, 0) 154 | if err != nil { 155 | t.Fatalf("unexpected error reading reading lines: %v", err) 156 | } 157 | 158 | if n != len(output) { 159 | t.Fatalf("incorrect number of lines read: expected %d, actual %d", len(output), n) 160 | } 161 | 162 | for i, line := range lines { 163 | if !bytes.Equal(output[i], line) { 164 | t.Errorf("incorrect content in line %d:\nexpected: %q\nactual: %q", i, output[i], line) 165 | } 166 | } 167 | }) 168 | } 169 | } 170 | 171 | func TestCopyFrom(t *testing.T) { 172 | tests := map[string]struct { 173 | Bytes int64 174 | Offset int64 175 | }{ 176 | "copyAll": { 177 | Bytes: byteBufferSize / 2, 178 | }, 179 | "copyPartial": { 180 | Bytes: byteBufferSize / 2, 181 | Offset: byteBufferSize / 4, 182 | }, 183 | "copyLarge": { 184 | Bytes: 8 * byteBufferSize, 185 | }, 186 | } 187 | 188 | for name, test := range tests { 189 | t.Run(name, func(t *testing.T) { 190 | data := make([]byte, test.Bytes) 191 | rand.Read(data) 192 | 193 | var dst bytes.Buffer 194 | n, err := copyFrom(&dst, bytes.NewReader(data), test.Offset) 195 | if err != nil { 196 | t.Fatalf("unexpected error copying data: %v", err) 197 | } 198 | if n != test.Bytes-test.Offset { 199 | t.Fatalf("incorrect number of bytes copied: expected %d, actual %d", test.Bytes-test.Offset, n) 200 | } 201 | 202 | expected := data[test.Offset:] 203 | if !bytes.Equal(expected, dst.Bytes()) { 204 | t.Fatalf("incorrect data copied:\nexpected: %v\nactual: %v", expected, dst.Bytes()) 205 | } 206 | }) 207 | } 208 | } 209 | 210 | func TestCopyLinesFrom(t *testing.T) { 211 | tests := map[string]struct { 212 | Lines int64 213 | Offset int64 214 | }{ 215 | "copyAll": { 216 | Lines: lineBufferSize / 2, 217 | }, 218 | "copyPartial": { 219 | Lines: lineBufferSize / 2, 220 | Offset: lineBufferSize / 4, 221 | }, 222 | "copyLarge": { 223 | Lines: 8 * lineBufferSize, 224 | }, 225 | } 226 | 227 | const lineLength = 128 228 | 229 | for name, test := range tests { 230 | t.Run(name, func(t *testing.T) { 231 | data := make([]byte, test.Lines*lineLength) 232 | for i := range data { 233 | data[i] = byte(32 + rand.Intn(95)) // ascii letters, numbers, symbols 234 | if i%lineLength == lineLength-1 { 235 | data[i] = '\n' 236 | } 237 | } 238 | 239 | var dst bytes.Buffer 240 | n, err := copyLinesFrom(&dst, &lineReaderAt{r: bytes.NewReader(data)}, test.Offset) 241 | if err != nil { 242 | t.Fatalf("unexpected error copying data: %v", err) 243 | } 244 | if n != test.Lines-test.Offset { 245 | t.Fatalf("incorrect number of lines copied: expected %d, actual %d", test.Lines-test.Offset, n) 246 | } 247 | 248 | expected := data[test.Offset*lineLength:] 249 | if !bytes.Equal(expected, dst.Bytes()) { 250 | t.Fatalf("incorrect data copied:\nexpected: %v\nactual: %v", expected, dst.Bytes()) 251 | } 252 | }) 253 | } 254 | } 255 | -------------------------------------------------------------------------------- /gitdiff/parser.go: -------------------------------------------------------------------------------- 1 | // Package gitdiff parses and applies patches generated by Git. It supports 2 | // line-oriented text patches, binary patches, and can also parse standard 3 | // unified diffs generated by other tools. 4 | package gitdiff 5 | 6 | import ( 7 | "bufio" 8 | "fmt" 9 | "io" 10 | ) 11 | 12 | // Parse parses a patch with changes to one or more files. Any content before 13 | // the first file is returned as the second value. If an error occurs while 14 | // parsing, it returns all files parsed before the error. 15 | // 16 | // Parse expects to receive a single patch. If the input may contain multiple 17 | // patches (for example, if it is an mbox file), callers should split it into 18 | // individual patches and call Parse on each one. 19 | func Parse(r io.Reader) ([]*File, string, error) { 20 | p := newParser(r) 21 | 22 | if err := p.Next(); err != nil { 23 | if err == io.EOF { 24 | return nil, "", nil 25 | } 26 | return nil, "", err 27 | } 28 | 29 | var preamble string 30 | var files []*File 31 | for { 32 | file, pre, err := p.ParseNextFileHeader() 33 | if err != nil { 34 | return files, preamble, err 35 | } 36 | if len(files) == 0 { 37 | preamble = pre 38 | } 39 | if file == nil { 40 | break 41 | } 42 | 43 | for _, fn := range []func(*File) (int, error){ 44 | p.ParseTextFragments, 45 | p.ParseBinaryFragments, 46 | } { 47 | n, err := fn(file) 48 | if err != nil { 49 | return files, preamble, err 50 | } 51 | if n > 0 { 52 | break 53 | } 54 | } 55 | 56 | files = append(files, file) 57 | } 58 | 59 | return files, preamble, nil 60 | } 61 | 62 | // TODO(bkeyes): consider exporting the parser type with configuration 63 | // this would enable OID validation, p-value guessing, and prefix stripping 64 | // by allowing users to set or override defaults 65 | 66 | // parser invariants: 67 | // - methods that parse objects: 68 | // - start with the parser on the first line of the first object 69 | // - if returning nil, do not advance 70 | // - if returning an error, do not advance past the object 71 | // - if returning an object, advance to the first line after the object 72 | // - any exported parsing methods must initialize the parser by calling Next() 73 | 74 | type stringReader interface { 75 | ReadString(delim byte) (string, error) 76 | } 77 | 78 | type parser struct { 79 | r stringReader 80 | 81 | eof bool 82 | lineno int64 83 | lines [3]string 84 | } 85 | 86 | func newParser(r io.Reader) *parser { 87 | if r, ok := r.(stringReader); ok { 88 | return &parser{r: r} 89 | } 90 | return &parser{r: bufio.NewReader(r)} 91 | } 92 | 93 | // Next advances the parser by one line. It returns any error encountered while 94 | // reading the line, including io.EOF when the end of stream is reached. 95 | func (p *parser) Next() error { 96 | if p.eof { 97 | return io.EOF 98 | } 99 | 100 | if p.lineno == 0 { 101 | // on first call to next, need to shift in all lines 102 | for i := 0; i < len(p.lines)-1; i++ { 103 | if err := p.shiftLines(); err != nil && err != io.EOF { 104 | return err 105 | } 106 | } 107 | } 108 | 109 | err := p.shiftLines() 110 | if err != nil && err != io.EOF { 111 | return err 112 | } 113 | 114 | p.lineno++ 115 | if p.lines[0] == "" { 116 | p.eof = true 117 | return io.EOF 118 | } 119 | return nil 120 | } 121 | 122 | func (p *parser) shiftLines() (err error) { 123 | for i := 0; i < len(p.lines)-1; i++ { 124 | p.lines[i] = p.lines[i+1] 125 | } 126 | p.lines[len(p.lines)-1], err = p.r.ReadString('\n') 127 | return 128 | } 129 | 130 | // Line returns a line from the parser without advancing it. A delta of 0 131 | // returns the current line, while higher deltas return read-ahead lines. It 132 | // returns an empty string if the delta is higher than the available lines, 133 | // either because of the buffer size or because the parser reached the end of 134 | // the input. Valid lines always contain at least a newline character. 135 | func (p *parser) Line(delta uint) string { 136 | return p.lines[delta] 137 | } 138 | 139 | // Errorf generates an error and appends the current line information. 140 | func (p *parser) Errorf(delta int64, msg string, args ...interface{}) error { 141 | return fmt.Errorf("gitdiff: line %d: %s", p.lineno+delta, fmt.Sprintf(msg, args...)) 142 | } 143 | -------------------------------------------------------------------------------- /gitdiff/parser_test.go: -------------------------------------------------------------------------------- 1 | package gitdiff 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "encoding/json" 7 | "io" 8 | "os" 9 | "reflect" 10 | "testing" 11 | ) 12 | 13 | func TestLineOperations(t *testing.T) { 14 | const content = "the first line\nthe second line\nthe third line\n" 15 | 16 | t.Run("read", func(t *testing.T) { 17 | p := newTestParser(content, false) 18 | 19 | for i, expected := range []string{ 20 | "the first line\n", 21 | "the second line\n", 22 | "the third line\n", 23 | } { 24 | if err := p.Next(); err != nil { 25 | t.Fatalf("error advancing parser after line %d: %v", i, err) 26 | } 27 | if p.lineno != int64(i+1) { 28 | t.Fatalf("incorrect line number: expected %d, actual: %d", i+1, p.lineno) 29 | } 30 | 31 | line := p.Line(0) 32 | if line != expected { 33 | t.Fatalf("incorrect line %d: expected %q, was %q", i+1, expected, line) 34 | } 35 | } 36 | 37 | // reading after the last line should return EOF 38 | if err := p.Next(); err != io.EOF { 39 | t.Fatalf("expected EOF after end, but got: %v", err) 40 | } 41 | if p.lineno != 4 { 42 | t.Fatalf("incorrect line number: expected %d, actual: %d", 4, p.lineno) 43 | } 44 | 45 | // reading again returns EOF again and does not advance the line 46 | if err := p.Next(); err != io.EOF { 47 | t.Fatalf("expected EOF after end, but got: %v", err) 48 | } 49 | if p.lineno != 4 { 50 | t.Fatalf("incorrect line number: expected %d, actual: %d", 4, p.lineno) 51 | } 52 | }) 53 | 54 | t.Run("peek", func(t *testing.T) { 55 | p := newTestParser(content, false) 56 | if err := p.Next(); err != nil { 57 | t.Fatalf("error advancing parser: %v", err) 58 | } 59 | 60 | line := p.Line(1) 61 | if line != "the second line\n" { 62 | t.Fatalf("incorrect peek line: %s", line) 63 | } 64 | 65 | if err := p.Next(); err != nil { 66 | t.Fatalf("error advancing parser after peek: %v", err) 67 | } 68 | 69 | line = p.Line(0) 70 | if line != "the second line\n" { 71 | t.Fatalf("incorrect read line: %s", line) 72 | } 73 | }) 74 | 75 | t.Run("emptyInput", func(t *testing.T) { 76 | p := newTestParser("", false) 77 | if err := p.Next(); err != io.EOF { 78 | t.Fatalf("expected EOF on first Next(), but got: %v", err) 79 | } 80 | }) 81 | } 82 | 83 | func TestParserInvariant_Advancement(t *testing.T) { 84 | tests := map[string]struct { 85 | Input string 86 | Parse func(p *parser) error 87 | EndLine string 88 | }{ 89 | "ParseGitFileHeader": { 90 | Input: `diff --git a/dir/file.txt b/dir/file.txt 91 | index 9540595..30e6333 100644 92 | --- a/dir/file.txt 93 | +++ b/dir/file.txt 94 | @@ -1,2 +1,3 @@ 95 | context line 96 | `, 97 | Parse: func(p *parser) error { 98 | _, err := p.ParseGitFileHeader() 99 | return err 100 | }, 101 | EndLine: "@@ -1,2 +1,3 @@\n", 102 | }, 103 | "ParseTraditionalFileHeader": { 104 | Input: `--- dir/file.txt 105 | +++ dir/file.txt 106 | @@ -1,2 +1,3 @@ 107 | context line 108 | `, 109 | Parse: func(p *parser) error { 110 | _, err := p.ParseTraditionalFileHeader() 111 | return err 112 | }, 113 | EndLine: "@@ -1,2 +1,3 @@\n", 114 | }, 115 | "ParseTextFragmentHeader": { 116 | Input: `@@ -1,2 +1,3 @@ 117 | context line 118 | `, 119 | Parse: func(p *parser) error { 120 | _, err := p.ParseTextFragmentHeader() 121 | return err 122 | }, 123 | EndLine: "context line\n", 124 | }, 125 | "ParseTextChunk": { 126 | Input: ` context line 127 | -old line 128 | +new line 129 | context line 130 | @@ -1 +1 @@ 131 | `, 132 | Parse: func(p *parser) error { 133 | return p.ParseTextChunk(&TextFragment{OldLines: 3, NewLines: 3}) 134 | }, 135 | EndLine: "@@ -1 +1 @@\n", 136 | }, 137 | "ParseTextFragments": { 138 | Input: `@@ -1,2 +1,2 @@ 139 | context line 140 | -old line 141 | +new line 142 | @@ -1,2 +1,2 @@ 143 | -old line 144 | +new line 145 | context line 146 | diff --git a/file.txt b/file.txt 147 | `, 148 | Parse: func(p *parser) error { 149 | _, err := p.ParseTextFragments(&File{}) 150 | return err 151 | }, 152 | EndLine: "diff --git a/file.txt b/file.txt\n", 153 | }, 154 | "ParseNextFileHeader": { 155 | Input: `not a header 156 | diff --git a/file.txt b/file.txt 157 | --- a/file.txt 158 | +++ b/file.txt 159 | @@ -1,2 +1,2 @@ 160 | `, 161 | Parse: func(p *parser) error { 162 | _, _, err := p.ParseNextFileHeader() 163 | return err 164 | }, 165 | EndLine: "@@ -1,2 +1,2 @@\n", 166 | }, 167 | "ParseBinaryMarker": { 168 | Input: `Binary files differ 169 | diff --git a/file.txt b/file.txt 170 | `, 171 | Parse: func(p *parser) error { 172 | _, _, err := p.ParseBinaryMarker() 173 | return err 174 | }, 175 | EndLine: "diff --git a/file.txt b/file.txt\n", 176 | }, 177 | "ParseBinaryFragmentHeader": { 178 | Input: `literal 0 179 | HcmV?d00001 180 | `, 181 | Parse: func(p *parser) error { 182 | _, err := p.ParseBinaryFragmentHeader() 183 | return err 184 | }, 185 | EndLine: "HcmV?d00001\n", 186 | }, 187 | "ParseBinaryChunk": { 188 | Input: "TcmZQzU|?i`" + `U?w2V48*Je09XJG 189 | 190 | literal 0 191 | `, 192 | Parse: func(p *parser) error { 193 | return p.ParseBinaryChunk(&BinaryFragment{Size: 20}) 194 | }, 195 | EndLine: "literal 0\n", 196 | }, 197 | "ParseBinaryFragments": { 198 | Input: `GIT binary patch 199 | literal 40 200 | gcmZQzU|?i` + "`" + `U?w2V48*KJ%mKu_Kr9NxN 240 | Date: Tue Apr 2 22:30:00 2019 -0700 241 | 242 | This is a sample commit message. 243 | 244 | diff --git a/file.txt b/file.txt 245 | index cc34da1..1acbae5 100644 246 | --- a/file.txt 247 | +++ b/file.txt 248 | @@ -1,3 +1,4 @@ 249 | `, 250 | Output: &File{ 251 | OldName: "file.txt", 252 | NewName: "file.txt", 253 | OldMode: os.FileMode(0100644), 254 | OldOIDPrefix: "cc34da1", 255 | NewOIDPrefix: "1acbae5", 256 | }, 257 | Preamble: `commit 1acbae563cd6ef5750a82ee64e116c6eb065cb94 258 | Author: Morton Haypenny 259 | Date: Tue Apr 2 22:30:00 2019 -0700 260 | 261 | This is a sample commit message. 262 | 263 | `, 264 | }, 265 | "traditionalHeader": { 266 | Input: ` 267 | --- file.txt 2019-04-01 22:58:14.833597918 -0700 268 | +++ file.txt 2019-04-01 22:58:14.833597918 -0700 269 | @@ -1,3 +1,4 @@ 270 | `, 271 | Output: &File{ 272 | OldName: "file.txt", 273 | NewName: "file.txt", 274 | }, 275 | Preamble: "\n", 276 | }, 277 | "noHeaders": { 278 | Input: ` 279 | this is a line 280 | this is another line 281 | --- could this be a header? 282 | nope, it's just some dashes 283 | `, 284 | Output: nil, 285 | Preamble: ` 286 | this is a line 287 | this is another line 288 | --- could this be a header? 289 | nope, it's just some dashes 290 | `, 291 | }, 292 | "detatchedFragmentLike": { 293 | Input: ` 294 | a wild fragment appears? 295 | @@ -1,3 +1,4 ~1,5 @@ 296 | `, 297 | Output: nil, 298 | Preamble: ` 299 | a wild fragment appears? 300 | @@ -1,3 +1,4 ~1,5 @@ 301 | `, 302 | }, 303 | "detatchedFragment": { 304 | Input: ` 305 | a wild fragment appears? 306 | @@ -1,3 +1,4 @@ 307 | `, 308 | Err: true, 309 | }, 310 | } 311 | 312 | for name, test := range tests { 313 | t.Run(name, func(t *testing.T) { 314 | p := newTestParser(test.Input, true) 315 | 316 | f, pre, err := p.ParseNextFileHeader() 317 | if test.Err { 318 | if err == nil || err == io.EOF { 319 | t.Fatalf("expected error parsing next file header, but got %v", err) 320 | } 321 | return 322 | } 323 | if err != nil { 324 | t.Fatalf("unexpected error parsing next file header: %v", err) 325 | } 326 | 327 | if test.Preamble != pre { 328 | t.Errorf("incorrect preamble\nexpected: %q\n actual: %q", test.Preamble, pre) 329 | } 330 | if !reflect.DeepEqual(test.Output, f) { 331 | t.Errorf("incorrect file\nexpected: %+v\n actual: %+v", test.Output, f) 332 | } 333 | }) 334 | } 335 | } 336 | 337 | func TestParse(t *testing.T) { 338 | textFragments := []*TextFragment{ 339 | { 340 | OldPosition: 3, 341 | OldLines: 6, 342 | NewPosition: 3, 343 | NewLines: 8, 344 | Comment: "fragment 1", 345 | Lines: []Line{ 346 | {OpContext, "context line\n"}, 347 | {OpDelete, "old line 1\n"}, 348 | {OpDelete, "old line 2\n"}, 349 | {OpContext, "context line\n"}, 350 | {OpAdd, "new line 1\n"}, 351 | {OpAdd, "new line 2\n"}, 352 | {OpAdd, "new line 3\n"}, 353 | {OpContext, "context line\n"}, 354 | {OpDelete, "old line 3\n"}, 355 | {OpAdd, "new line 4\n"}, 356 | {OpAdd, "new line 5\n"}, 357 | }, 358 | LinesAdded: 5, 359 | LinesDeleted: 3, 360 | LeadingContext: 1, 361 | }, 362 | { 363 | OldPosition: 31, 364 | OldLines: 2, 365 | NewPosition: 33, 366 | NewLines: 2, 367 | Comment: "fragment 2", 368 | Lines: []Line{ 369 | {OpContext, "context line\n"}, 370 | {OpDelete, "old line 4\n"}, 371 | {OpAdd, "new line 6\n"}, 372 | }, 373 | LinesAdded: 1, 374 | LinesDeleted: 1, 375 | LeadingContext: 1, 376 | }, 377 | } 378 | 379 | textPreamble := `commit 5d9790fec7d95aa223f3d20936340bf55ff3dcbe 380 | Author: Morton Haypenny 381 | Date: Tue Apr 2 22:55:40 2019 -0700 382 | 383 | A file with multiple fragments. 384 | 385 | The content is arbitrary. 386 | 387 | ` 388 | 389 | binaryPreamble := `commit 5d9790fec7d95aa223f3d20936340bf55ff3dcbe 390 | Author: Morton Haypenny 391 | Date: Tue Apr 2 22:55:40 2019 -0700 392 | 393 | A binary file with the first 10 fibonacci numbers. 394 | 395 | ` 396 | tests := map[string]struct { 397 | InputFile string 398 | Output []*File 399 | Preamble string 400 | Err bool 401 | }{ 402 | "oneFile": { 403 | InputFile: "testdata/one_file.patch", 404 | Output: []*File{ 405 | { 406 | OldName: "dir/file1.txt", 407 | NewName: "dir/file1.txt", 408 | OldMode: os.FileMode(0100644), 409 | OldOIDPrefix: "ebe9fa54", 410 | NewOIDPrefix: "fe103e1d", 411 | TextFragments: textFragments, 412 | }, 413 | }, 414 | Preamble: textPreamble, 415 | }, 416 | "twoFiles": { 417 | InputFile: "testdata/two_files.patch", 418 | Output: []*File{ 419 | { 420 | OldName: "dir/file1.txt", 421 | NewName: "dir/file1.txt", 422 | OldMode: os.FileMode(0100644), 423 | OldOIDPrefix: "ebe9fa54", 424 | NewOIDPrefix: "fe103e1d", 425 | TextFragments: textFragments, 426 | }, 427 | { 428 | OldName: "dir/file2.txt", 429 | NewName: "dir/file2.txt", 430 | OldMode: os.FileMode(0100644), 431 | OldOIDPrefix: "417ebc70", 432 | NewOIDPrefix: "67514b7f", 433 | TextFragments: textFragments, 434 | }, 435 | }, 436 | Preamble: textPreamble, 437 | }, 438 | "noFiles": { 439 | InputFile: "testdata/no_files.patch", 440 | Output: nil, 441 | Preamble: textPreamble, 442 | }, 443 | "newBinaryFile": { 444 | InputFile: "testdata/new_binary_file.patch", 445 | Output: []*File{ 446 | { 447 | OldName: "", 448 | NewName: "dir/ten.bin", 449 | NewMode: os.FileMode(0100644), 450 | OldOIDPrefix: "0000000000000000000000000000000000000000", 451 | NewOIDPrefix: "77b068ba48c356156944ea714740d0d5ca07bfec", 452 | IsNew: true, 453 | IsBinary: true, 454 | BinaryFragment: &BinaryFragment{ 455 | Method: BinaryPatchLiteral, 456 | Size: 40, 457 | Data: fib(10, binary.BigEndian), 458 | }, 459 | ReverseBinaryFragment: &BinaryFragment{ 460 | Method: BinaryPatchLiteral, 461 | Size: 0, 462 | Data: []byte{}, 463 | }, 464 | }, 465 | }, 466 | Preamble: binaryPreamble, 467 | }, 468 | } 469 | 470 | for name, test := range tests { 471 | t.Run(name, func(t *testing.T) { 472 | f, err := os.Open(test.InputFile) 473 | if err != nil { 474 | t.Fatalf("unexpected error opening input file: %v", err) 475 | } 476 | 477 | files, pre, err := Parse(f) 478 | if test.Err { 479 | if err == nil || err == io.EOF { 480 | t.Fatalf("expected error parsing patch, but got %v", err) 481 | } 482 | return 483 | } 484 | if err != nil { 485 | t.Fatalf("unexpected error parsing patch: %v", err) 486 | } 487 | 488 | if len(test.Output) != len(files) { 489 | t.Fatalf("incorrect number of parsed files: expected %d, actual %d", len(test.Output), len(files)) 490 | } 491 | if test.Preamble != pre { 492 | t.Errorf("incorrect preamble\nexpected: %q\n actual: %q", test.Preamble, pre) 493 | } 494 | for i := range test.Output { 495 | if !reflect.DeepEqual(test.Output[i], files[i]) { 496 | exp, _ := json.MarshalIndent(test.Output[i], "", " ") 497 | act, _ := json.MarshalIndent(files[i], "", " ") 498 | t.Errorf("incorrect file at position %d\nexpected: %s\n actual: %s", i, exp, act) 499 | } 500 | } 501 | }) 502 | } 503 | } 504 | 505 | func newTestParser(input string, init bool) *parser { 506 | p := newParser(bytes.NewBufferString(input)) 507 | if init { 508 | _ = p.Next() 509 | } 510 | return p 511 | } 512 | -------------------------------------------------------------------------------- /gitdiff/patch_header.go: -------------------------------------------------------------------------------- 1 | package gitdiff 2 | 3 | import ( 4 | "bufio" 5 | "errors" 6 | "fmt" 7 | "io" 8 | "io/ioutil" 9 | "mime/quotedprintable" 10 | "net/mail" 11 | "strconv" 12 | "strings" 13 | "time" 14 | "unicode" 15 | ) 16 | 17 | const ( 18 | mailHeaderPrefix = "From " 19 | prettyHeaderPrefix = "commit " 20 | mailMinimumHeaderPrefix = "From:" 21 | ) 22 | 23 | // PatchHeader is a parsed version of the preamble content that appears before 24 | // the first diff in a patch. It includes metadata about the patch, such as the 25 | // author and a subject. 26 | type PatchHeader struct { 27 | // The SHA of the commit the patch was generated from. Empty if the SHA is 28 | // not included in the header. 29 | SHA string 30 | 31 | // The author details of the patch. If these details are not included in 32 | // the header, Author is nil and AuthorDate is the zero time. 33 | Author *PatchIdentity 34 | AuthorDate time.Time 35 | 36 | // The committer details of the patch. If these details are not included in 37 | // the header, Committer is nil and CommitterDate is the zero time. 38 | Committer *PatchIdentity 39 | CommitterDate time.Time 40 | 41 | // The title and body of the commit message describing the changes in the 42 | // patch. Empty if no message is included in the header. 43 | Title string 44 | Body string 45 | 46 | // If the preamble looks like an email, ParsePatchHeader will 47 | // remove prefixes such as `Re: ` and `[PATCH v3 5/17]` from the 48 | // Title and place them here. 49 | SubjectPrefix string 50 | 51 | // If the preamble looks like an email, and it contains a `---` 52 | // line, that line will be removed and everything after it will be 53 | // placed in BodyAppendix. 54 | BodyAppendix string 55 | } 56 | 57 | // Message returns the commit message for the header. The message consists of 58 | // the title and the body separated by an empty line. 59 | func (h *PatchHeader) Message() string { 60 | var msg strings.Builder 61 | if h != nil { 62 | msg.WriteString(h.Title) 63 | if h.Body != "" { 64 | msg.WriteString("\n\n") 65 | msg.WriteString(h.Body) 66 | } 67 | } 68 | return msg.String() 69 | } 70 | 71 | // ParsePatchDate parses a patch date string. It returns the parsed time or an 72 | // error if s has an unknown format. ParsePatchDate supports the iso, rfc, 73 | // short, raw, unix, and default formats (with local variants) used by the 74 | // --date flag in Git. 75 | func ParsePatchDate(s string) (time.Time, error) { 76 | const ( 77 | isoFormat = "2006-01-02 15:04:05 -0700" 78 | isoStrictFormat = "2006-01-02T15:04:05-07:00" 79 | rfc2822Format = "Mon, 2 Jan 2006 15:04:05 -0700" 80 | shortFormat = "2006-01-02" 81 | defaultFormat = "Mon Jan 2 15:04:05 2006 -0700" 82 | defaultLocalFormat = "Mon Jan 2 15:04:05 2006" 83 | ) 84 | 85 | if s == "" { 86 | return time.Time{}, nil 87 | } 88 | 89 | for _, fmt := range []string{ 90 | isoFormat, 91 | isoStrictFormat, 92 | rfc2822Format, 93 | shortFormat, 94 | defaultFormat, 95 | defaultLocalFormat, 96 | } { 97 | if t, err := time.ParseInLocation(fmt, s, time.Local); err == nil { 98 | return t, nil 99 | } 100 | } 101 | 102 | // unix format 103 | if unix, err := strconv.ParseInt(s, 10, 64); err == nil { 104 | return time.Unix(unix, 0), nil 105 | } 106 | 107 | // raw format 108 | if space := strings.IndexByte(s, ' '); space > 0 { 109 | unix, uerr := strconv.ParseInt(s[:space], 10, 64) 110 | zone, zerr := time.Parse("-0700", s[space+1:]) 111 | if uerr == nil && zerr == nil { 112 | return time.Unix(unix, 0).In(zone.Location()), nil 113 | } 114 | } 115 | 116 | return time.Time{}, fmt.Errorf("unknown date format: %s", s) 117 | } 118 | 119 | // A PatchHeaderOption modifies the behavior of ParsePatchHeader. 120 | type PatchHeaderOption func(*patchHeaderOptions) 121 | 122 | // SubjectCleanMode controls how ParsePatchHeader cleans subject lines when 123 | // parsing mail-formatted patches. 124 | type SubjectCleanMode int 125 | 126 | const ( 127 | // SubjectCleanWhitespace removes leading and trailing whitespace. 128 | SubjectCleanWhitespace SubjectCleanMode = iota 129 | 130 | // SubjectCleanAll removes leading and trailing whitespace, leading "Re:", 131 | // "re:", and ":" strings, and leading strings enclosed by '[' and ']'. 132 | // This is the default behavior of git (see `git mailinfo`) and this 133 | // package. 134 | SubjectCleanAll 135 | 136 | // SubjectCleanPatchOnly is the same as SubjectCleanAll, but only removes 137 | // leading strings enclosed by '[' and ']' if they start with "PATCH". 138 | SubjectCleanPatchOnly 139 | ) 140 | 141 | // WithSubjectCleanMode sets the SubjectCleanMode for header parsing. By 142 | // default, uses SubjectCleanAll. 143 | func WithSubjectCleanMode(m SubjectCleanMode) PatchHeaderOption { 144 | return func(opts *patchHeaderOptions) { 145 | opts.subjectCleanMode = m 146 | } 147 | } 148 | 149 | type patchHeaderOptions struct { 150 | subjectCleanMode SubjectCleanMode 151 | } 152 | 153 | // ParsePatchHeader parses the preamble string returned by [Parse] into a 154 | // PatchHeader. Due to the variety of header formats, some fields of the parsed 155 | // PatchHeader may be unset after parsing. 156 | // 157 | // Supported formats are the short, medium, full, fuller, and email pretty 158 | // formats used by `git diff`, `git log`, and `git show` and the UNIX mailbox 159 | // format used by `git format-patch`. 160 | // 161 | // When parsing mail-formatted headers, ParsePatchHeader tries to remove 162 | // email-specific content from the title and body: 163 | // 164 | // - Based on the SubjectCleanMode, remove prefixes like reply markers and 165 | // "[PATCH]" strings from the subject, saving any removed content in the 166 | // SubjectPrefix field. Parsing always discards leading and trailing 167 | // whitespace from the subject line. The default mode is SubjectCleanAll. 168 | // 169 | // - If the body contains a "---" line (3 hyphens), remove that line and any 170 | // content after it from the body and save it in the BodyAppendix field. 171 | // 172 | // ParsePatchHeader tries to process content it does not understand wthout 173 | // returning errors, but will return errors if well-identified content like 174 | // dates or identies uses unknown or invalid formats. 175 | func ParsePatchHeader(header string, options ...PatchHeaderOption) (*PatchHeader, error) { 176 | opts := patchHeaderOptions{ 177 | subjectCleanMode: SubjectCleanAll, // match git defaults 178 | } 179 | for _, optFn := range options { 180 | optFn(&opts) 181 | } 182 | 183 | header = strings.TrimSpace(header) 184 | if header == "" { 185 | return &PatchHeader{}, nil 186 | } 187 | 188 | var firstLine, rest string 189 | if idx := strings.IndexByte(header, '\n'); idx >= 0 { 190 | firstLine = header[:idx] 191 | rest = header[idx+1:] 192 | } else { 193 | firstLine = header 194 | rest = "" 195 | } 196 | 197 | switch { 198 | case strings.HasPrefix(firstLine, mailHeaderPrefix): 199 | return parseHeaderMail(firstLine, strings.NewReader(rest), opts) 200 | 201 | case strings.HasPrefix(firstLine, mailMinimumHeaderPrefix): 202 | // With a minimum header, the first line is part of the actual mail 203 | // content and needs to be parsed as part of the "rest" 204 | return parseHeaderMail("", strings.NewReader(header), opts) 205 | 206 | case strings.HasPrefix(firstLine, prettyHeaderPrefix): 207 | return parseHeaderPretty(firstLine, strings.NewReader(rest)) 208 | } 209 | 210 | return nil, errors.New("unrecognized patch header format") 211 | } 212 | 213 | func parseHeaderPretty(prettyLine string, r io.Reader) (*PatchHeader, error) { 214 | const ( 215 | authorPrefix = "Author:" 216 | commitPrefix = "Commit:" 217 | datePrefix = "Date:" 218 | authorDatePrefix = "AuthorDate:" 219 | commitDatePrefix = "CommitDate:" 220 | ) 221 | 222 | h := &PatchHeader{} 223 | 224 | prettyLine = strings.TrimPrefix(prettyLine, prettyHeaderPrefix) 225 | if i := strings.IndexByte(prettyLine, ' '); i > 0 { 226 | h.SHA = prettyLine[:i] 227 | } else { 228 | h.SHA = prettyLine 229 | } 230 | 231 | s := bufio.NewScanner(r) 232 | for s.Scan() { 233 | line := s.Text() 234 | 235 | // empty line marks end of fields, remaining lines are title/message 236 | if strings.TrimSpace(line) == "" { 237 | break 238 | } 239 | 240 | switch { 241 | case strings.HasPrefix(line, authorPrefix): 242 | u, err := ParsePatchIdentity(line[len(authorPrefix):]) 243 | if err != nil { 244 | return nil, err 245 | } 246 | h.Author = &u 247 | 248 | case strings.HasPrefix(line, commitPrefix): 249 | u, err := ParsePatchIdentity(line[len(commitPrefix):]) 250 | if err != nil { 251 | return nil, err 252 | } 253 | h.Committer = &u 254 | 255 | case strings.HasPrefix(line, datePrefix): 256 | d, err := ParsePatchDate(strings.TrimSpace(line[len(datePrefix):])) 257 | if err != nil { 258 | return nil, err 259 | } 260 | h.AuthorDate = d 261 | 262 | case strings.HasPrefix(line, authorDatePrefix): 263 | d, err := ParsePatchDate(strings.TrimSpace(line[len(authorDatePrefix):])) 264 | if err != nil { 265 | return nil, err 266 | } 267 | h.AuthorDate = d 268 | 269 | case strings.HasPrefix(line, commitDatePrefix): 270 | d, err := ParsePatchDate(strings.TrimSpace(line[len(commitDatePrefix):])) 271 | if err != nil { 272 | return nil, err 273 | } 274 | h.CommitterDate = d 275 | } 276 | } 277 | if s.Err() != nil { 278 | return nil, s.Err() 279 | } 280 | 281 | title, indent := scanMessageTitle(s) 282 | if s.Err() != nil { 283 | return nil, s.Err() 284 | } 285 | h.Title = title 286 | 287 | if title != "" { 288 | // Don't check for an appendix, pretty headers do not contain them 289 | body, _ := scanMessageBody(s, indent, false) 290 | if s.Err() != nil { 291 | return nil, s.Err() 292 | } 293 | h.Body = body 294 | } 295 | 296 | return h, nil 297 | } 298 | 299 | func scanMessageTitle(s *bufio.Scanner) (title string, indent string) { 300 | var b strings.Builder 301 | for i := 0; s.Scan(); i++ { 302 | line := s.Text() 303 | trimLine := strings.TrimSpace(line) 304 | if trimLine == "" { 305 | break 306 | } 307 | 308 | if i == 0 { 309 | if start := strings.IndexFunc(line, func(c rune) bool { return !unicode.IsSpace(c) }); start > 0 { 310 | indent = line[:start] 311 | } 312 | } 313 | if b.Len() > 0 { 314 | b.WriteByte(' ') 315 | } 316 | b.WriteString(trimLine) 317 | } 318 | return b.String(), indent 319 | } 320 | 321 | func scanMessageBody(s *bufio.Scanner, indent string, separateAppendix bool) (string, string) { 322 | // Body and appendix 323 | var body, appendix strings.Builder 324 | c := &body 325 | var empty int 326 | for i := 0; s.Scan(); i++ { 327 | line := s.Text() 328 | 329 | line = strings.TrimRightFunc(line, unicode.IsSpace) 330 | line = strings.TrimPrefix(line, indent) 331 | 332 | if line == "" { 333 | empty++ 334 | continue 335 | } 336 | 337 | // If requested, parse out "appendix" information (often added 338 | // by `git format-patch` and removed by `git am`). 339 | if separateAppendix && c == &body && line == "---" { 340 | c = &appendix 341 | continue 342 | } 343 | 344 | if c.Len() > 0 { 345 | c.WriteByte('\n') 346 | if empty > 0 { 347 | c.WriteByte('\n') 348 | } 349 | } 350 | empty = 0 351 | 352 | c.WriteString(line) 353 | } 354 | return body.String(), appendix.String() 355 | } 356 | 357 | func parseHeaderMail(mailLine string, r io.Reader, opts patchHeaderOptions) (*PatchHeader, error) { 358 | msg, err := mail.ReadMessage(r) 359 | if err != nil { 360 | return nil, err 361 | } 362 | 363 | h := &PatchHeader{} 364 | 365 | if strings.HasPrefix(mailLine, mailHeaderPrefix) { 366 | mailLine = strings.TrimPrefix(mailLine, mailHeaderPrefix) 367 | if i := strings.IndexByte(mailLine, ' '); i > 0 { 368 | h.SHA = mailLine[:i] 369 | } 370 | } 371 | 372 | from := msg.Header.Get("From") 373 | if from != "" { 374 | u, err := ParsePatchIdentity(from) 375 | if err != nil { 376 | return nil, err 377 | } 378 | h.Author = &u 379 | } 380 | 381 | date := msg.Header.Get("Date") 382 | if date != "" { 383 | d, err := ParsePatchDate(date) 384 | if err != nil { 385 | return nil, err 386 | } 387 | h.AuthorDate = d 388 | } 389 | 390 | subject := msg.Header.Get("Subject") 391 | h.SubjectPrefix, h.Title = cleanSubject(subject, opts.subjectCleanMode) 392 | 393 | s := bufio.NewScanner(msg.Body) 394 | h.Body, h.BodyAppendix = scanMessageBody(s, "", true) 395 | if s.Err() != nil { 396 | return nil, s.Err() 397 | } 398 | 399 | return h, nil 400 | } 401 | 402 | func cleanSubject(s string, mode SubjectCleanMode) (prefix string, subject string) { 403 | switch mode { 404 | case SubjectCleanAll, SubjectCleanPatchOnly: 405 | case SubjectCleanWhitespace: 406 | return "", strings.TrimSpace(decodeSubject(s)) 407 | default: 408 | panic(fmt.Sprintf("unknown clean mode: %d", mode)) 409 | } 410 | 411 | // Based on the algorithm from Git in mailinfo.c:cleanup_subject() 412 | // If compatibility with `git am` drifts, go there to see if there are any updates. 413 | 414 | at := 0 415 | for at < len(s) { 416 | switch s[at] { 417 | case 'r', 'R': 418 | // Detect re:, Re:, rE: and RE: 419 | if at+2 < len(s) && (s[at+1] == 'e' || s[at+1] == 'E') && s[at+2] == ':' { 420 | at += 3 421 | continue 422 | } 423 | 424 | case ' ', '\t', ':': 425 | // Delete whitespace and duplicate ':' characters 426 | at++ 427 | continue 428 | 429 | case '[': 430 | if i := strings.IndexByte(s[at:], ']'); i > 0 { 431 | if mode == SubjectCleanAll || strings.Contains(s[at:at+i+1], "PATCH") { 432 | at += i + 1 433 | continue 434 | } 435 | } 436 | } 437 | 438 | // Nothing was removed, end processing 439 | break 440 | } 441 | 442 | prefix = strings.TrimLeftFunc(s[:at], unicode.IsSpace) 443 | subject = strings.TrimRightFunc(decodeSubject(s[at:]), unicode.IsSpace) 444 | return 445 | } 446 | 447 | // Decodes a subject line. Currently only supports quoted-printable UTF-8. This format is the result 448 | // of a `git format-patch` when the commit title has a non-ASCII character (i.e. an emoji). 449 | // See for reference: https://stackoverflow.com/questions/27695749/gmail-api-not-respecting-utf-encoding-in-subject 450 | func decodeSubject(encoded string) string { 451 | if !strings.HasPrefix(encoded, "=?UTF-8?q?") { 452 | // not UTF-8 encoded 453 | return encoded 454 | } 455 | 456 | // If the subject is too long, `git format-patch` may produce a subject line across 457 | // multiple lines. When parsed, this can look like the following: 458 | // 459 | payload := " " + encoded 460 | payload = strings.ReplaceAll(payload, " =?UTF-8?q?", "") 461 | payload = strings.ReplaceAll(payload, "?=", "") 462 | 463 | decoded, err := ioutil.ReadAll(quotedprintable.NewReader(strings.NewReader(payload))) 464 | if err != nil { 465 | // if err, abort decoding and return original subject 466 | return encoded 467 | } 468 | 469 | return string(decoded) 470 | } 471 | -------------------------------------------------------------------------------- /gitdiff/patch_identity.go: -------------------------------------------------------------------------------- 1 | package gitdiff 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | ) 7 | 8 | // PatchIdentity identifies a person who authored or committed a patch. 9 | type PatchIdentity struct { 10 | Name string 11 | Email string 12 | } 13 | 14 | func (i PatchIdentity) String() string { 15 | name := i.Name 16 | if name == "" { 17 | name = `""` 18 | } 19 | return fmt.Sprintf("%s <%s>", name, i.Email) 20 | } 21 | 22 | // ParsePatchIdentity parses a patch identity string. A patch identity contains 23 | // an email address and an optional name in [RFC 5322] format. This is either a 24 | // plain email adddress or a name followed by an address in angle brackets: 25 | // 26 | // author@example.com 27 | // Author Name 28 | // 29 | // If the input is not one of these formats, ParsePatchIdentity applies a 30 | // heuristic to separate the name and email portions. If both the name and 31 | // email are missing or empty, ParsePatchIdentity returns an error. It 32 | // otherwise does not validate the result. 33 | // 34 | // [RFC 5322]: https://datatracker.ietf.org/doc/html/rfc5322 35 | func ParsePatchIdentity(s string) (PatchIdentity, error) { 36 | s = normalizeSpace(s) 37 | s = unquotePairs(s) 38 | 39 | var name, email string 40 | if at := strings.IndexByte(s, '@'); at >= 0 { 41 | start, end := at, at 42 | for start >= 0 && !isRFC5332Space(s[start]) && s[start] != '<' { 43 | start-- 44 | } 45 | for end < len(s) && !isRFC5332Space(s[end]) && s[end] != '>' { 46 | end++ 47 | } 48 | email = s[start+1 : end] 49 | 50 | // Adjust the boundaries so that we drop angle brackets, but keep 51 | // spaces when removing the email to form the name. 52 | if start < 0 || s[start] != '<' { 53 | start++ 54 | } 55 | if end >= len(s) || s[end] != '>' { 56 | end-- 57 | } 58 | name = s[:start] + s[end+1:] 59 | } else { 60 | start, end := 0, 0 61 | for i := 0; i < len(s); i++ { 62 | if s[i] == '<' && start == 0 { 63 | start = i + 1 64 | } 65 | if s[i] == '>' && start > 0 { 66 | end = i 67 | break 68 | } 69 | } 70 | if start > 0 && end >= start { 71 | email = strings.TrimSpace(s[start:end]) 72 | name = s[:start-1] 73 | } 74 | } 75 | 76 | // After extracting the email, the name might contain extra whitespace 77 | // again and may be surrounded by comment characters. The git source gives 78 | // these examples of when this can happen: 79 | // 80 | // "Name " 81 | // "email@domain (Name)" 82 | // "Name (Comment)" 83 | // 84 | name = normalizeSpace(name) 85 | if strings.HasPrefix(name, "(") && strings.HasSuffix(name, ")") { 86 | name = name[1 : len(name)-1] 87 | } 88 | name = strings.TrimSpace(name) 89 | 90 | // If the name is empty or contains email-like characters, use the email 91 | // instead (assuming one exists) 92 | if name == "" || strings.ContainsAny(name, "@<>") { 93 | name = email 94 | } 95 | 96 | if name == "" && email == "" { 97 | return PatchIdentity{}, fmt.Errorf("invalid identity string %q", s) 98 | } 99 | return PatchIdentity{Name: name, Email: email}, nil 100 | } 101 | 102 | // unquotePairs process the RFC5322 tokens "quoted-string" and "comment" to 103 | // remove any "quoted-pairs" (backslash-espaced characters). It also removes 104 | // the quotes from any quoted strings, but leaves the comment delimiters. 105 | func unquotePairs(s string) string { 106 | quote := false 107 | comments := 0 108 | escaped := false 109 | 110 | var out strings.Builder 111 | for i := 0; i < len(s); i++ { 112 | if escaped { 113 | escaped = false 114 | } else { 115 | switch s[i] { 116 | case '\\': 117 | // quoted-pair is only allowed in quoted-string/comment 118 | if quote || comments > 0 { 119 | escaped = true 120 | continue // drop '\' character 121 | } 122 | 123 | case '"': 124 | if comments == 0 { 125 | quote = !quote 126 | continue // drop '"' character 127 | } 128 | 129 | case '(': 130 | if !quote { 131 | comments++ 132 | } 133 | case ')': 134 | if comments > 0 { 135 | comments-- 136 | } 137 | } 138 | } 139 | out.WriteByte(s[i]) 140 | } 141 | return out.String() 142 | } 143 | 144 | // normalizeSpace trims leading and trailing whitespace from s and converts 145 | // inner sequences of one or more whitespace characters to single spaces. 146 | func normalizeSpace(s string) string { 147 | var sb strings.Builder 148 | for i := 0; i < len(s); i++ { 149 | c := s[i] 150 | if !isRFC5332Space(c) { 151 | if sb.Len() > 0 && isRFC5332Space(s[i-1]) { 152 | sb.WriteByte(' ') 153 | } 154 | sb.WriteByte(c) 155 | } 156 | } 157 | return sb.String() 158 | } 159 | 160 | func isRFC5332Space(c byte) bool { 161 | switch c { 162 | case '\t', '\n', '\r', ' ': 163 | return true 164 | } 165 | return false 166 | } 167 | -------------------------------------------------------------------------------- /gitdiff/patch_identity_test.go: -------------------------------------------------------------------------------- 1 | package gitdiff 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestParsePatchIdentity(t *testing.T) { 8 | tests := map[string]struct { 9 | Input string 10 | Output PatchIdentity 11 | Err interface{} 12 | }{ 13 | "simple": { 14 | Input: "Morton Haypenny ", 15 | Output: PatchIdentity{ 16 | Name: "Morton Haypenny", 17 | Email: "mhaypenny@example.com", 18 | }, 19 | }, 20 | "extraWhitespace": { 21 | Input: "\t Morton Haypenny \r\n ", 22 | Output: PatchIdentity{ 23 | Name: "Morton Haypenny", 24 | Email: "mhaypenny@example.com", 25 | }, 26 | }, 27 | "trailingCharacters": { 28 | Input: "Morton Haypenny II", 29 | Output: PatchIdentity{ 30 | Name: "Morton Haypenny II", 31 | Email: "mhaypenny@example.com", 32 | }, 33 | }, 34 | "onlyEmail": { 35 | Input: "mhaypenny@example.com", 36 | Output: PatchIdentity{ 37 | Name: "mhaypenny@example.com", 38 | Email: "mhaypenny@example.com", 39 | }, 40 | }, 41 | "onlyEmailInBrackets": { 42 | Input: "", 43 | Output: PatchIdentity{ 44 | Name: "mhaypenny@example.com", 45 | Email: "mhaypenny@example.com", 46 | }, 47 | }, 48 | "rfc5322SpecialCharacters": { 49 | Input: `"dependabot[bot]" <12345+dependabot[bot]@users.noreply.github.com>`, 50 | Output: PatchIdentity{ 51 | Name: "dependabot[bot]", 52 | Email: "12345+dependabot[bot]@users.noreply.github.com", 53 | }, 54 | }, 55 | "rfc5322QuotedPairs": { 56 | Input: `"Morton \"Old-Timer\" Haypenny" <"mhaypenny\+[1900]"@example.com> (III \(PhD\))`, 57 | Output: PatchIdentity{ 58 | Name: `Morton "Old-Timer" Haypenny (III (PhD))`, 59 | Email: "mhaypenny+[1900]@example.com", 60 | }, 61 | }, 62 | "rfc5322QuotedPairsOutOfContext": { 63 | Input: `Morton \\Backslash Haypenny `, 64 | Output: PatchIdentity{ 65 | Name: `Morton \\Backslash Haypenny`, 66 | Email: "mhaypenny@example.com", 67 | }, 68 | }, 69 | "emptyEmail": { 70 | Input: "Morton Haypenny <>", 71 | Output: PatchIdentity{ 72 | Name: "Morton Haypenny", 73 | Email: "", 74 | }, 75 | }, 76 | "unclosedEmail": { 77 | Input: "Morton Haypenny ", 85 | Output: PatchIdentity{ 86 | Name: "Morton Haypenny", 87 | Email: "mhaypenny", 88 | }, 89 | }, 90 | "bogusEmailWithWhitespace": { 91 | Input: "Morton Haypenny < mhaypenny >", 92 | Output: PatchIdentity{ 93 | Name: "Morton Haypenny", 94 | Email: "mhaypenny", 95 | }, 96 | }, 97 | "missingEmail": { 98 | Input: "Morton Haypenny", 99 | Err: "invalid identity", 100 | }, 101 | "missingNameAndEmptyEmail": { 102 | Input: "<>", 103 | Err: "invalid identity", 104 | }, 105 | "empty": { 106 | Input: "", 107 | Err: "invalid identity", 108 | }, 109 | } 110 | 111 | for name, test := range tests { 112 | t.Run(name, func(t *testing.T) { 113 | id, err := ParsePatchIdentity(test.Input) 114 | if test.Err != nil { 115 | assertError(t, test.Err, err, "parsing identity") 116 | return 117 | } 118 | if err != nil { 119 | t.Fatalf("unexpected error parsing identity: %v", err) 120 | } 121 | 122 | if test.Output != id { 123 | t.Errorf("incorrect identity: expected %#v, actual %#v", test.Output, id) 124 | } 125 | }) 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /gitdiff/testdata/apply/bin.go: -------------------------------------------------------------------------------- 1 | //go:build ignore 2 | 3 | // bin.go is a helper CLI to manipulate binary diff data for testing purposes. 4 | // It can decode patches generated by git using the standard parsing functions 5 | // or it can encode binary data back into the format expected by Git. It 6 | // operates on stdin writes results (possibly binary) to stdout. 7 | 8 | package main 9 | 10 | import ( 11 | "bytes" 12 | "compress/zlib" 13 | "encoding/binary" 14 | "flag" 15 | "io/ioutil" 16 | "log" 17 | "os" 18 | "strings" 19 | 20 | "github.com/bluekeyes/go-gitdiff/gitdiff" 21 | ) 22 | 23 | var ( 24 | b85Powers = []uint32{52200625, 614125, 7225, 85, 1} 25 | b85Alpha = []byte( 26 | "0123456789" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "!#$%&()*+-;<=>?@^_`{|}~", 27 | ) 28 | ) 29 | 30 | var mode string 31 | 32 | func base85Encode(data []byte) []byte { 33 | chunks, remaining := len(data)/4, len(data)%4 34 | if remaining > 0 { 35 | data = append(data, make([]byte, 4-remaining)...) 36 | chunks++ 37 | } 38 | 39 | var n int 40 | out := make([]byte, 5*chunks) 41 | 42 | for i := 0; i < len(data); i += 4 { 43 | v := binary.BigEndian.Uint32(data[i : i+4]) 44 | for j := 0; j < 5; j++ { 45 | p := v / b85Powers[j] 46 | out[n+j] = b85Alpha[p] 47 | v -= b85Powers[j] * p 48 | } 49 | n += 5 50 | } 51 | 52 | return out 53 | } 54 | 55 | func compress(data []byte) ([]byte, error) { 56 | var b bytes.Buffer 57 | w := zlib.NewWriter(&b) 58 | 59 | if _, err := w.Write(data); err != nil { 60 | return nil, err 61 | } 62 | if err := w.Close(); err != nil { 63 | return nil, err 64 | } 65 | 66 | return b.Bytes(), nil 67 | } 68 | 69 | func wrap(data []byte) string { 70 | var s strings.Builder 71 | for i := 0; i < len(data); i += 52 { 72 | c := 52 73 | if c > len(data)-i { 74 | c = len(data) - i 75 | } 76 | b := (c / 5) * 4 77 | 78 | if b <= 26 { 79 | s.WriteByte(byte('A' + b - 1)) 80 | } else { 81 | s.WriteByte(byte('a' + b - 27)) 82 | } 83 | s.Write(data[i : i+c]) 84 | s.WriteByte('\n') 85 | } 86 | return s.String() 87 | } 88 | 89 | func init() { 90 | flag.StringVar(&mode, "mode", "parse", "operation mode, one of 'parse' or 'encode'") 91 | } 92 | 93 | func main() { 94 | flag.Parse() 95 | 96 | switch mode { 97 | case "parse": 98 | files, _, err := gitdiff.Parse(os.Stdin) 99 | if err != nil { 100 | log.Fatalf("failed to parse file: %v", err) 101 | } 102 | if len(files) != 1 { 103 | log.Fatalf("patch contains more than one file: %d", len(files)) 104 | } 105 | if files[0].BinaryFragment == nil { 106 | log.Fatalf("patch file does not contain a binary fragment") 107 | } 108 | os.Stdout.Write(files[0].BinaryFragment.Data) 109 | 110 | case "encode": 111 | data, err := ioutil.ReadAll(os.Stdin) 112 | if err != nil { 113 | log.Fatalf("failed to read input: %v", err) 114 | } 115 | data, err = compress(data) 116 | if err != nil { 117 | log.Fatalf("failed to compress data: %v", err) 118 | } 119 | os.Stdout.WriteString(wrap(base85Encode(data))) 120 | 121 | default: 122 | log.Fatalf("unknown mode: %s", mode) 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /gitdiff/testdata/apply/bin_fragment_delta_error.src: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bluekeyes/go-gitdiff/HEAD/gitdiff/testdata/apply/bin_fragment_delta_error.src -------------------------------------------------------------------------------- /gitdiff/testdata/apply/bin_fragment_delta_error_dst_size.patch: -------------------------------------------------------------------------------- 1 | diff --git a/gitdiff/testdata/apply/bin_fragment_delta_error.src b/gitdiff/testdata/apply/bin_fragment_delta_error.src 2 | GIT binary patch 3 | delta 18 4 | fc${itY+{<=z`_4AtEhVK$zKyatN;N30RR6$D+j^= 5 | 6 | -------------------------------------------------------------------------------- /gitdiff/testdata/apply/bin_fragment_delta_error_incomplete_add.patch: -------------------------------------------------------------------------------- 1 | diff --git a/gitdiff/testdata/apply/bin_fragment_delta_error.src b/gitdiff/testdata/apply/bin_fragment_delta_error.src 2 | GIT binary patch 3 | delta 11 4 | Xc${itY+{_?z`_4As|XMP0RR6K8UwQc 5 | 6 | -------------------------------------------------------------------------------- /gitdiff/testdata/apply/bin_fragment_delta_error_incomplete_copy.patch: -------------------------------------------------------------------------------- 1 | diff --git a/gitdiff/testdata/apply/bin_fragment_delta_error.src b/gitdiff/testdata/apply/bin_fragment_delta_error.src 2 | GIT binary patch 3 | delta 17 4 | fc${itY+{_?z`_4AtEhVK$zKya00961|Nl5!2ZsOv 5 | 6 | -------------------------------------------------------------------------------- /gitdiff/testdata/apply/bin_fragment_delta_error_src_size.patch: -------------------------------------------------------------------------------- 1 | diff --git a/gitdiff/testdata/apply/bin_fragment_delta_error.src b/gitdiff/testdata/apply/bin_fragment_delta_error.src 2 | GIT binary patch 3 | delta 18 4 | fc${itYGRz=z`_4AtEhVK$zKyatN;N30RR6$EeFB? 5 | 6 | -------------------------------------------------------------------------------- /gitdiff/testdata/apply/bin_fragment_delta_modify.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bluekeyes/go-gitdiff/HEAD/gitdiff/testdata/apply/bin_fragment_delta_modify.out -------------------------------------------------------------------------------- /gitdiff/testdata/apply/bin_fragment_delta_modify.patch: -------------------------------------------------------------------------------- 1 | diff --git a/gitdiff/testdata/apply/bin_fragment_delta_modify.src b/gitdiff/testdata/apply/bin_fragment_delta_modify.src 2 | GIT binary patch 3 | delta 172 4 | zcmV;d08{^f2)qc8AP{I3VQ>J`s>wb0HU+h#6w8q?tUO~cHmDjZi2<8yZ9XmKhhMdo 5 | zWu(4bg|8QwzZ|1e*rL4P#)`FenXTQ5=J2y;^BfB}4 11 | zWkisH791|vOVl5e-@^VLX0s~Ky_UyN!3;CgPr>Edj0j+0gOSwSsFsr$0q6zUJphJCY 5 | zqz)g`N&ViW9W6D7kX7GX{y{m=JvFi1-r`PEE?SE&^2StK}qz5hC 7 | zom0ywdCWuNb#YXdd0%FMNHft!FTER$>uzu5gxdoGBy789raJBW7jAhN2TWFL{P%2l 8 | z|AX{}Jz8U}Y*X|~=?4<;F4)94!-e?w)#D0h8n1_ORWNCOC&7=!U0MP3BI0~M)pZ-cf6aFkVFzO&JOkcv7FeEq|)DcBDAP&_&ZgBk* 11 | zVp(I^5-bN3L{~g{bHnkWX%0Hj02~njkKX8Zz%Ih#=LBD%Pk%TkW4ize_HweE#@_-2 12 | zvpyE#e@^n#rRGx;O84LB3bMdrxdv%Gkc)ZQq%8pkxT9b*)}Z&t5bibZ0)8H8T33vN 13 | zgTj)j_%wz13x+TZ6LgdupD^ke2!n7E-YZ%8n3OTzK5*T(BH>ltfU|QJ7VTAur}ah?0sC%ZUY?&xwkEUw@Y9LqO1~eF>>5cDlg_YxuF| 17 | zZ58JVlu7IBfP1@TDmG;<{X>(&*HyC-7zqRrY~#3W>hP8@a&jwYi_$k(j`D$Ta97 19 | z{(rL4B7Ysa#+5QC40p?w_V}r&G%{W$)8R1P`wet5K>`4D&EanFW{d2mOBvm~hJviTXh@a)Qp^TpF9X}vEVN-{LA%5Xp^*>%23c&{6HgPH4ahb@257yLNuno+(_9IJ!cgw97b*@K~X-9f8rdsd` 26 | zOSj362)!mBsX%A 28 | zQ*StV4WzVHG#+*IwPf7@BwNAUN{Tre0&7KHK8Gq6W2Jeqe%S(>+{1HM$cFn>J%6m& 29 | z|8$kpBb=Dxh6$MwDCzl1g;Y`~f7F-wEny-#iI1X?l(D;Z$PLR3jay0@l4S5PR8fS< 30 | zi+>Hek8mDV9ItDt3}cF;v-yN79ZUwT1^LyF1n$Cy%gt_C&r9r*A*)&$ZFlHnVf=mg#Q%sK{dyD8$2z{ndd#G^c?_~0mb!M$7ZxU$373E=vV}GR5OrWDm 34 | zBsRhxx$#Ql_U#b2h}uPg{qm53MhlGO!F^=m@8@Ie88^`%?=q2+>^o91FPtaKgX{mUR_AEr9NwT^_^@8>UHx 37 | z6Vql9ovz7$NpnPl5QlqsKP0|}EskXfTz-@ZpBlsI-~?B-4%w_GY9s9?ln=!A8wNMf 38 | z=;eAIwp6&0nZ9Qq({C*2AXxKEGCjGa)2K^@ryP2!6dWiRy0M0vL*145f@BzR 40 | z`IXs~3pGOq&;GIFDC=$JbDmK$C`A~nS$@*@hY$2e@kN0E;7jK@*TzW0 41 | z2FmwjqM0G-dfMDB3x5@pycb${TTJ7dzu+teFC9mUiSr(jrvQ~;sj`>uoen9{iXP5; 42 | zbF9`-K065Zx&Tv2gK4wf8(U++M31oGlTiV$(^)qux97HP~_mSNPtqZA#7xOo;Q 43 | zc-B?N(jYE1ix_L{M|cpCS4s5I3Tx_Hh1#!O-iBP5iS>iM{C~ZUez^pglqH5IAdOAD 44 | zE;AjwjL)_IsQU6y4t4Uc9KXl~!BYeA;B8*wK)|cg^tiD0aN2iWzR>YnUUS9?3jKhiAKpgQ0 48 | zCRNTKwkeO~{pU2|S8)UYp21g#5$lGV|mN 50 | zdQNSkH%R6}>C_cemrZCjR|-)D<8Y^P74a}>FKs&95{8d9SiTvhB4{eTUGFbTa-IbX 51 | z8Lw1xoPPp_ZdIbl#dCzP1`N@V?eb7SizWtmm5ujG(rKZHZw$>Ozzf4E=Yf*+eH}kn 52 | zL{&n><${iESA&t~5pz;&vs%HlLYxXMjPlLH{?*-#T4#$(P;pke+8K*f)n*Ph>SB9|8)|f;w=%OP6`nms 55 | zdFVyjJf#&^=o+P_Y1Q{$$Aql8VqX+>=4)lx*&wB#{$tlb&SwC3E~G-pKOFXRQrRSk 56 | zMt}Fa|4IQc;^N}stu@1BVjk>`8grvj#WVz9k#>7N?#JQZdag9deih!2aWVB}1klbo 57 | zXKhuKy*h7m?``T}aM*}PexaGXqeZCm>ME7BQi64D>9lL0AoF2`Tzo^hNSMC9B=jT@ 58 | zjA%viojl=gm6?1mPDg-)fy98Cb}Bdio_{dD+i;%nIJ6ZfQ^LPvl@~R#wk@K-##0ma 59 | zj|D(^Fr8U(o(FRy$Z0FGyp@Yam&`H8oVv>6n8UKEVec>#*NVp)YrdOPgay4SxQ!PjT#gf^PISz?!W;(=9W 63 | zC3sT|H>d}ErTr;c8LY_P{IPZ;Wq&_a4mM`dLu 66 | zn^@7N(YC~q-qV~usbu?>{mD#~LNHhWdnNcaB&J>J@M)=+i^{*SwmS-XE-ywoYoZ3U 67 | zdb4|M(!Ok$LY!K2m*Wjy>w_)zTm#k_g&J$VCb%~Wdvc^;lfge`vp1dg{O^Aqhg$$l5i=+C+Ser`KQQfEhZr}z4>;>#SGZZ*qGHd~H-az)BPnlSx{cgt);r6Mgt;@-+9-E2t@FXz( 73 | zkh$)!g3>JMRK2Fj;(t$e?z7fJ;>m@#_j;z{x_|Hzf&NnB2s)A$gs4~;$GCPhUTy3P 74 | z%V#3D9@E1QNOb5UL}Ny_3_nU3V+}u&a}K`lA2AVv-@33!d)C9C2b%Dl#4Wp`G3Y#VXS2vODOK 78 | zH6Ik?Qx|Shj(@mOGi-V8X@6z3u=_{}wqW)r%+?8?joDa^!2`v;{CIT;J4sydrLxNW 79 | zgUBvqK&gHOJyAP#hP2e3p;XKF@8~4;2})IVed0tk&>;1X$v5<9B1$g>xX)iE=u55Vhuq!Be_Zh7H_L0ojmJ-L7F 82 | zFE;pV4HH_WmO_~*TJ#EHu@A`M)iV-3k0!7^nIQ%08mJA-{<~wkIj0>_AP%Vz_4#<* 83 | vVkRz)@E~iIs*PG)1#O3 88 | z^y`3>TH+sKUrDep!F5A*^DLBShsp6!+cwX9)&)8gv+|D%3&Z;I1%KSpdiN0>xk9!c 89 | zDy`)OH&-h44JMjA=aom~2^DT%XWWk@BGsF>T2qX9tzqChzyU07Q5O!7pHe8DLN|bp 90 | zdlTM)yHOJqRKS{Ok~8#6aa|1~wdC!f6HE-YeYg|0WDJ@d5_mH_Xk7QkFCJ);Ovbo( 91 | zM*o?Jc;mjWG9!T2#lADbM8kdZGB*L;W@^4gw!CX?raCf7DLTZhN{Ky@jK1r2x?YmoKqE|=?ny{_>M 93 | zPW`hAEW;McTgizc-%KZOZ(LC{iJ26YFd!W@TJ{Cm#XaWxI)6edCQgN9H-J&#etfq2BL8u2mpTqV;-qg8O*hlGo~w*^QU&D8~wh* 95 | zjRoXGhq}{c%8{%P7XC3-Gtv_4vPhC&G({}4=soxR<|OO8=U32O6B6X4cu}vawOLA8 96 | zUCsnRtC7@}bbt5OjoNEaRt9SW579M@O#ke{I?9P!pHGx8+mS!>XB(pRy3MCefz~7F 97 | zw>%NX4brs~1S)zzfOF(+Ek>{e>P?F^u#}8;o=P;Q#CD^EyGpfE6zisF&7Wp0@PM#uQ8l(zm 100 | zP$=UTG!hf$iSFO~fxdOLf78q%Z-e$nFbWUzJ1GphTCI4>g(9IgBR5F8ark@QBWdEc 101 | z#zwo4Yk#W3G2@1tNzg@lLP}&U1pA#O7MZO99tC&}9^@dg>AOG&wsG)`L6)m8D^F-bi%B9Fk!}EJ;7-NTD*1)VJblm%?L@A$yiNR_ 103 | zN*tPjf$n}UNI)oYV<>TQzBo6=Js6=KI%9xd0e}BZ7R)GU!se_DK2rXrAU6Li*}JNk 104 | z#OhSK^H=?bSi&TwvW+tB(6HGW@aslSEso+M=;CSx(a|*2cpNSek=YCt0xna?)V|}{ 105 | zTrlxT(K!Y~T&oi%3ZE#@!{L<{lsr)-9)*xnPKtF8N_ai2o#MZ0IBC!gL$Qsp6Z 106 | z8Gn6s&gnB+R1tMMtKG)p(&tf2mQR=p-i$vp0Miv<=k6$|Zzncr&mA(7R@^rLWDs=I 107 | zYQH*agSALP4X>gKE|9hMvV&2kw^AvZ!faW}gmNmbaQYn9l7n@!u{?2sM8nv;Tmv{O 108 | zBGR5{oW=M)QBwCHaH^{#iZy8#xtUkH+J6it`5)T}v?(oj_fcQINt+8O%OF`^vYP4)mg#jyW+dip{0)NO!-BTlR_?Vq;{U272sSWrCqIVm8cQL 110 | zrQmZDOJ2`L8Ma$4xNjy5tYTIfMP$0Cem%%TLvRcw`DGcD@_|S%DDQ4u3UK 111 | z8OEd2toTA8!T^Fm!2&_{Jf5ZMEi=opVlV!Ik 113 | z<##3|qPvm^@~emp!AcWqZnh=Kjei*4uG>XNb8+}}YQ^1W{{z9%$|}v5_lcs>dBQ)!1^xT1u+L5lB_@cL=1D~x9Wuz 116 | zxNt@}3^A_ZoI>X}!Mx*>E&MhCXYogWFhGnpZyeU!If!?eQb$yAbIIg><^*Non_1wm3-B0LADTnL 120 | z@Hcy)r!Y%8x7clRE|bsgq9dRISAT07rTRBH&nUUbh`5p}Mb34r|9T->C}v-0X5Ec}{F9eQ~ 124 | zhpvHGyil+IHRzd!=^rDs{FlTe*YP3?X>tO`A|pa&VSnR#Teu~TOBJYPw~MpSAuLU& 125 | zyl44lEnDGx9xOCB98{3a*(->ajsd_h%GmzCJno?Q&Pk&<XzvfiOofjV7KjkT8RO)tQL0)FS;%Xt5w2$x+^;B_k^yfQRtHC!ndJ& 127 | z&$LQY(4EC(dem!kO3UB}z~JrL$`X9Qg4CLqd7*@Hn2yxVThL!~BW 128 | za6(zTA3VB62Y($=87Nd(yuJH&{z_$9;Eo)VO3Rv;-+gC34F02W_29+|Br6D$(fQ&u 129 | z--#+MO#x4xiRrjERDA?xBfD@AaPlspDCD~m)QT=JS6a}X`|BZPaBeJ2pzNE=Uv-7K 130 | zn1=v3M4?6-!r&5zU^ey=zQ|mJwNTM3ynPRaeEP2%Ab&NAz23L!+3J78>DNz@ 131 | zcRGVQ{T78^a%8e;7hmY!<1!>F%P3kh+(XpJvYZi2+_KJ{TpjXm2?SpLtC$kZ^u(dN 132 | zk6GUPl;ciEb+#wFOVmH>IJ*$ACbE1kae%lKRxO7TlRgGD&%hdcWBJ_hl&X@eH`oP+ 133 | zEE(=8aL=VamW$>w$<-v#=-Gn(S+ 134 | z3;|DiLStTzl)8WRlm|&b);zFN)QobrszD9rwkLNNY*MK+{OAn&2FY03tEpf5Jpj54Gk>^{cy&yUeV3R*pRs^>fqgveU|_5X 136 | zX+e%27qs2?7c(6BMHwnKprk+Z!)v4I=V4BAvdR#Lu7_d4gJ5#jsXEF%=!5*qv7*_k 137 | zM%5WM$*Vou!Y7D@*jCa6 139 | zYHwni68{B(ythcY+j4tKR&BK~wpq+p_I^Ee5*>UjeSbn=Z@M{3)IDB1h`wTBv8Vf) 140 | zGUabKI^vx;3Bt3)O=|zXZF^CRCWGFRoqrAlxT0y&$j5q6SnOOU5o0?&Ps&D6Ph}`# 141 | zwY!d(eP-j3KN_5`jU!b3rM~A)_kI~RXoFy;RSw!9pX9GG#B*F)`BST~98N+GqC9J7 142 | zyK!;S%6g@oNzmD1wzp`{WZzW@_X`T-&+vR;$j+YidW6`>)~^xl)>6cuUrT~*Lu`sa 144 | ztO}<9;#H(Q==8o5Tl}2UX=uGs%2mo19NqD{5U5NNDyo1Rz+mR`!bH$50%+yM7He^f 145 | zdBH00TyclwDiI=}Q>9_P*rcl2Xnz%4ZX#_#&l$m~RIyIW^MA=hnUPFr%4bUq`Q(Rz 146 | zpGTKslcC%{fbRe!3oa<}4EKVhQ%zS`J2GeEVr+blZ0VAxrNnYnS|C25Uov}{GHoRv 147 | zfFHfa&Jwlv(G^#}8A+}RLJN4n=M4UA|@AT(`^>Fu_AKhK^yT$XPA9`v4eB_FD$ie0HD3Mi4;`s$hx2m9!(i8H!RzGg?C)?9Pb`H(UMj 149 | z<8h|0U)ScABl{{?sOhBJJ+yFPUOt7$z&nj*C(+UXB1_UZbG@s_K&)51_vPKxQ4~mI 150 | zVsBl7$^wYH(tS=4B*eHGsefF8JLWa`(eLnE!hn1l1gzVr56RAj0G_?aY8PsdnewAx 151 | zZT0?fpiS{s0Aw+t2$UUQ8M-rW={ko_;sJpjJ1cye3c$(VR4{`2w#4pbFy+`(TlH;+ 152 | zsRJ^EQ{%@!-i8GRBqYoy6jIyiS8YC#b<%d)^wC2C>^bvFCR##kmw%eyk5ClDOBLPk 153 | z?ZY@ePP8_qq>U*{?_kesZmky4-_cLmu?|B2);KDCQ?spBIJE?6j8`9&{>UoVIRUPw 154 | zrx_TuibGY$dcSSH{d>KOqk_s$i=X_v>|dz1MMzQ0CJX+$*Y(PaiAhNRl8^?%L6tN! 155 | zoCVZZRG(i1T9lcmLVv7ZE)U68M1}eqD2QeQo3F+z`_kj^3BAQ~kifx^iJVoht|)uQ 156 | zp@Zi4b(sUN|HikW5)IF|`VA(5R}Oig)4g3o@Edr6G~IUUj9NaSza}p$jk_pOfzO2r 157 | z`Uq<(Z;%5A7DwJe@~J%KO>wC&&NuCjN8iAHC?0eTu|CfS+kf-2P)BX2mNxYJOt)<# 158 | zud7R9Is%@q9H$%no|irWi2&HJ00Qekek!<&=4kke7cKnH-x$$Mg_!j+R4)~S@4-7 160 | zr5;gyzbMDhd4E#B$j(G@-z05T!$YZ=j)t^KKgHs!H_s_|8VC%~m+HAY-YK*Nx44gH 161 | zaUG54Wez*M!YaxNS#)BjN)7?2M!A3NOY99Oa}Rb;C#RPNionqlK3@N0PTt)Op#&Rj>ipw!qDZki{+-V5CQD(Yj^-6 166 | 167 | -------------------------------------------------------------------------------- /gitdiff/testdata/apply/bin_fragment_delta_modify_large.src: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bluekeyes/go-gitdiff/HEAD/gitdiff/testdata/apply/bin_fragment_delta_modify_large.src -------------------------------------------------------------------------------- /gitdiff/testdata/apply/bin_fragment_literal_create.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bluekeyes/go-gitdiff/HEAD/gitdiff/testdata/apply/bin_fragment_literal_create.out -------------------------------------------------------------------------------- /gitdiff/testdata/apply/bin_fragment_literal_create.patch: -------------------------------------------------------------------------------- 1 | diff --git a/gitdiff/testdata/apply/bin_fragment_literal_create.src b/gitdiff/testdata/apply/bin_fragment_literal_create.src 2 | GIT binary patch 3 | literal 32 4 | ocmZQzU`lR_IpTEvL*`JUiBtu5L*`JUiBtu5J`s>wb0HU+h#6w8q?tUO~cHmDjZi2<8yZ9XmKhhMdo 5 | zWu(4bg|8QwzZ|1e*rL4P#)`FenXTQ5=J2y;^BfB}4 11 | zWkisH791|vOVl5e-@^VLX0s~Ky_UyN!3;CgPr>Edj0j+0gOSwSsFsr$0q6zUJph 3 | Date: Tue Apr 2 22:55:40 2019 -0700 4 | 5 | A binary file with the first 10 fibonacci numbers. 6 | 7 | diff --git a/dir/ten.bin b/dir/ten.bin 8 | new file mode 100644 9 | index 0000000000000000000000000000000000000000..77b068ba48c356156944ea714740d0d5ca07bfec 10 | GIT binary patch 11 | literal 40 12 | gcmZQzU|?i`U?w2V48*KJ%mKu_Kr9NxN 3 | Date: Tue Apr 2 22:55:40 2019 -0700 4 | 5 | A file with multiple fragments. 6 | 7 | The content is arbitrary. 8 | 9 | -------------------------------------------------------------------------------- /gitdiff/testdata/one_file.patch: -------------------------------------------------------------------------------- 1 | commit 5d9790fec7d95aa223f3d20936340bf55ff3dcbe 2 | Author: Morton Haypenny 3 | Date: Tue Apr 2 22:55:40 2019 -0700 4 | 5 | A file with multiple fragments. 6 | 7 | The content is arbitrary. 8 | 9 | diff --git a/dir/file1.txt b/dir/file1.txt 10 | index ebe9fa54..fe103e1d 100644 11 | --- a/dir/file1.txt 12 | +++ b/dir/file1.txt 13 | @@ -3,6 +3,8 @@ fragment 1 14 | context line 15 | -old line 1 16 | -old line 2 17 | context line 18 | +new line 1 19 | +new line 2 20 | +new line 3 21 | context line 22 | -old line 3 23 | +new line 4 24 | +new line 5 25 | @@ -31,2 +33,2 @@ fragment 2 26 | context line 27 | -old line 4 28 | +new line 6 29 | -------------------------------------------------------------------------------- /gitdiff/testdata/string/binary_modify.patch: -------------------------------------------------------------------------------- 1 | diff --git a/file.bin b/file.bin 2 | index a7f4d5d6975ec021016c02b6d58345ebf434f38c..bdc9a70f055892146612dcdb413f0e339faaa0df 100644 3 | GIT binary patch 4 | delta 66 5 | QcmeZhVVvM$!$1K50C&Ox;s5{u 6 | 7 | delta 5 8 | McmZo+^qAlQ00i9urT_o{ 9 | 10 | -------------------------------------------------------------------------------- /gitdiff/testdata/string/binary_modify_nodata.patch: -------------------------------------------------------------------------------- 1 | diff --git a/file.bin b/file.bin 2 | index a7f4d5d..bdc9a70 100644 3 | Binary files a/file.bin and b/file.bin differ 4 | -------------------------------------------------------------------------------- /gitdiff/testdata/string/binary_new.patch: -------------------------------------------------------------------------------- 1 | diff --git a/file.bin b/file.bin 2 | new file mode 100644 3 | index 0000000000000000000000000000000000000000..a7f4d5d6975ec021016c02b6d58345ebf434f38c 4 | GIT binary patch 5 | literal 72 6 | zcmV-O0Jr~td-`u6JcK&{KDK= 3 | Date: Tue Apr 2 22:55:40 2019 -0700 4 | 5 | A file with multiple fragments. 6 | 7 | The content is arbitrary. 8 | 9 | diff --git a/dir/file1.txt b/dir/file1.txt 10 | index ebe9fa54..fe103e1d 100644 11 | --- a/dir/file1.txt 12 | +++ b/dir/file1.txt 13 | @@ -3,6 +3,8 @@ fragment 1 14 | context line 15 | -old line 1 16 | -old line 2 17 | context line 18 | +new line 1 19 | +new line 2 20 | +new line 3 21 | context line 22 | -old line 3 23 | +new line 4 24 | +new line 5 25 | @@ -31,2 +33,2 @@ fragment 2 26 | context line 27 | -old line 4 28 | +new line 6 29 | diff --git a/dir/file2.txt b/dir/file2.txt 30 | index 417ebc70..67514b7f 100644 31 | --- a/dir/file2.txt 32 | +++ b/dir/file2.txt 33 | @@ -3,6 +3,8 @@ fragment 1 34 | context line 35 | -old line 1 36 | -old line 2 37 | context line 38 | +new line 1 39 | +new line 2 40 | +new line 3 41 | context line 42 | -old line 3 43 | +new line 4 44 | +new line 5 45 | @@ -31,2 +33,2 @@ fragment 2 46 | context line 47 | -old line 4 48 | +new line 6 49 | -------------------------------------------------------------------------------- /gitdiff/text.go: -------------------------------------------------------------------------------- 1 | package gitdiff 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "strconv" 7 | "strings" 8 | ) 9 | 10 | // ParseTextFragments parses text fragments until the next file header or the 11 | // end of the stream and attaches them to the given file. It returns the number 12 | // of fragments that were added. 13 | func (p *parser) ParseTextFragments(f *File) (n int, err error) { 14 | for { 15 | frag, err := p.ParseTextFragmentHeader() 16 | if err != nil { 17 | return n, err 18 | } 19 | if frag == nil { 20 | return n, nil 21 | } 22 | 23 | if f.IsNew && frag.OldLines > 0 { 24 | return n, p.Errorf(-1, "new file depends on old contents") 25 | } 26 | if f.IsDelete && frag.NewLines > 0 { 27 | return n, p.Errorf(-1, "deleted file still has contents") 28 | } 29 | 30 | if err := p.ParseTextChunk(frag); err != nil { 31 | return n, err 32 | } 33 | 34 | f.TextFragments = append(f.TextFragments, frag) 35 | n++ 36 | } 37 | } 38 | 39 | func (p *parser) ParseTextFragmentHeader() (*TextFragment, error) { 40 | const ( 41 | startMark = "@@ -" 42 | endMark = " @@" 43 | ) 44 | 45 | if !strings.HasPrefix(p.Line(0), startMark) { 46 | return nil, nil 47 | } 48 | 49 | parts := strings.SplitAfterN(p.Line(0), endMark, 2) 50 | if len(parts) < 2 { 51 | return nil, p.Errorf(0, "invalid fragment header") 52 | } 53 | 54 | f := &TextFragment{} 55 | f.Comment = strings.TrimSpace(parts[1]) 56 | 57 | header := parts[0][len(startMark) : len(parts[0])-len(endMark)] 58 | ranges := strings.Split(header, " +") 59 | if len(ranges) != 2 { 60 | return nil, p.Errorf(0, "invalid fragment header") 61 | } 62 | 63 | var err error 64 | if f.OldPosition, f.OldLines, err = parseRange(ranges[0]); err != nil { 65 | return nil, p.Errorf(0, "invalid fragment header: %v", err) 66 | } 67 | if f.NewPosition, f.NewLines, err = parseRange(ranges[1]); err != nil { 68 | return nil, p.Errorf(0, "invalid fragment header: %v", err) 69 | } 70 | 71 | if err := p.Next(); err != nil && err != io.EOF { 72 | return nil, err 73 | } 74 | return f, nil 75 | } 76 | 77 | func (p *parser) ParseTextChunk(frag *TextFragment) error { 78 | if p.Line(0) == "" { 79 | return p.Errorf(0, "no content following fragment header") 80 | } 81 | 82 | oldLines, newLines := frag.OldLines, frag.NewLines 83 | for oldLines > 0 || newLines > 0 { 84 | line := p.Line(0) 85 | op, data := line[0], line[1:] 86 | 87 | switch op { 88 | case '\n': 89 | data = "\n" 90 | fallthrough // newer GNU diff versions create empty context lines 91 | case ' ': 92 | oldLines-- 93 | newLines-- 94 | if frag.LinesAdded == 0 && frag.LinesDeleted == 0 { 95 | frag.LeadingContext++ 96 | } else { 97 | frag.TrailingContext++ 98 | } 99 | frag.Lines = append(frag.Lines, Line{OpContext, data}) 100 | case '-': 101 | oldLines-- 102 | frag.LinesDeleted++ 103 | frag.TrailingContext = 0 104 | frag.Lines = append(frag.Lines, Line{OpDelete, data}) 105 | case '+': 106 | newLines-- 107 | frag.LinesAdded++ 108 | frag.TrailingContext = 0 109 | frag.Lines = append(frag.Lines, Line{OpAdd, data}) 110 | case '\\': 111 | // this may appear in middle of fragment if it's for a deleted line 112 | if isNoNewlineMarker(line) { 113 | removeLastNewline(frag) 114 | break 115 | } 116 | fallthrough 117 | default: 118 | // TODO(bkeyes): if this is because we hit the next header, it 119 | // would be helpful to return the miscounts line error. We could 120 | // either test for the common headers ("@@ -", "diff --git") or 121 | // assume any invalid op ends the fragment; git returns the same 122 | // generic error in all cases so either is compatible 123 | return p.Errorf(0, "invalid line operation: %q", op) 124 | } 125 | 126 | if err := p.Next(); err != nil { 127 | if err == io.EOF { 128 | break 129 | } 130 | return err 131 | } 132 | } 133 | 134 | if oldLines != 0 || newLines != 0 { 135 | hdr := max(frag.OldLines-oldLines, frag.NewLines-newLines) + 1 136 | return p.Errorf(-hdr, "fragment header miscounts lines: %+d old, %+d new", -oldLines, -newLines) 137 | } 138 | if frag.LinesAdded == 0 && frag.LinesDeleted == 0 { 139 | return p.Errorf(0, "fragment contains no changes") 140 | } 141 | 142 | // check for a final "no newline" marker since it is not included in the 143 | // counters used to stop the loop above 144 | if isNoNewlineMarker(p.Line(0)) { 145 | removeLastNewline(frag) 146 | if err := p.Next(); err != nil && err != io.EOF { 147 | return err 148 | } 149 | } 150 | 151 | return nil 152 | } 153 | 154 | func isNoNewlineMarker(s string) bool { 155 | // test for "\ No newline at end of file" by prefix because the text 156 | // changes by locale (git claims all versions are at least 12 chars) 157 | return len(s) >= 12 && s[:2] == "\\ " 158 | } 159 | 160 | func removeLastNewline(frag *TextFragment) { 161 | if len(frag.Lines) > 0 { 162 | last := &frag.Lines[len(frag.Lines)-1] 163 | last.Line = strings.TrimSuffix(last.Line, "\n") 164 | } 165 | } 166 | 167 | func parseRange(s string) (start int64, end int64, err error) { 168 | parts := strings.SplitN(s, ",", 2) 169 | 170 | if start, err = strconv.ParseInt(parts[0], 10, 64); err != nil { 171 | nerr := err.(*strconv.NumError) 172 | return 0, 0, fmt.Errorf("bad start of range: %s: %v", parts[0], nerr.Err) 173 | } 174 | 175 | if len(parts) > 1 { 176 | if end, err = strconv.ParseInt(parts[1], 10, 64); err != nil { 177 | nerr := err.(*strconv.NumError) 178 | return 0, 0, fmt.Errorf("bad end of range: %s: %v", parts[1], nerr.Err) 179 | } 180 | } else { 181 | end = 1 182 | } 183 | 184 | return 185 | } 186 | 187 | func max(a, b int64) int64 { 188 | if a > b { 189 | return a 190 | } 191 | return b 192 | } 193 | -------------------------------------------------------------------------------- /gitdiff/text_test.go: -------------------------------------------------------------------------------- 1 | package gitdiff 2 | 3 | import ( 4 | "io" 5 | "reflect" 6 | "testing" 7 | ) 8 | 9 | func TestParseTextFragmentHeader(t *testing.T) { 10 | tests := map[string]struct { 11 | Input string 12 | Output *TextFragment 13 | Err bool 14 | }{ 15 | "shortest": { 16 | Input: "@@ -1 +1 @@\n", 17 | Output: &TextFragment{ 18 | OldPosition: 1, 19 | OldLines: 1, 20 | NewPosition: 1, 21 | NewLines: 1, 22 | }, 23 | }, 24 | "standard": { 25 | Input: "@@ -21,5 +28,9 @@\n", 26 | Output: &TextFragment{ 27 | OldPosition: 21, 28 | OldLines: 5, 29 | NewPosition: 28, 30 | NewLines: 9, 31 | }, 32 | }, 33 | "trailingComment": { 34 | Input: "@@ -21,5 +28,9 @@ func test(n int) {\n", 35 | Output: &TextFragment{ 36 | Comment: "func test(n int) {", 37 | OldPosition: 21, 38 | OldLines: 5, 39 | NewPosition: 28, 40 | NewLines: 9, 41 | }, 42 | }, 43 | "incomplete": { 44 | Input: "@@ -12,3 +2\n", 45 | Err: true, 46 | }, 47 | "badNumbers": { 48 | Input: "@@ -1a,2b +3c,4d @@\n", 49 | Err: true, 50 | }, 51 | } 52 | 53 | for name, test := range tests { 54 | t.Run(name, func(t *testing.T) { 55 | p := newTestParser(test.Input, true) 56 | 57 | frag, err := p.ParseTextFragmentHeader() 58 | if test.Err { 59 | if err == nil || err == io.EOF { 60 | t.Fatalf("expected error parsing header, but got %v", err) 61 | } 62 | return 63 | } 64 | if err != nil { 65 | t.Fatalf("error parsing header: %v", err) 66 | } 67 | 68 | if !reflect.DeepEqual(test.Output, frag) { 69 | t.Errorf("incorrect fragment\nexpected: %+v\nactual: %+v", test.Output, frag) 70 | } 71 | }) 72 | } 73 | } 74 | 75 | func TestParseTextChunk(t *testing.T) { 76 | tests := map[string]struct { 77 | Input string 78 | Fragment TextFragment 79 | 80 | Output *TextFragment 81 | Err bool 82 | }{ 83 | "addWithContext": { 84 | Input: ` context line 85 | +new line 1 86 | +new line 2 87 | context line 88 | `, 89 | Fragment: TextFragment{ 90 | OldLines: 2, 91 | NewLines: 4, 92 | }, 93 | Output: &TextFragment{ 94 | OldLines: 2, 95 | NewLines: 4, 96 | Lines: []Line{ 97 | {OpContext, "context line\n"}, 98 | {OpAdd, "new line 1\n"}, 99 | {OpAdd, "new line 2\n"}, 100 | {OpContext, "context line\n"}, 101 | }, 102 | LinesAdded: 2, 103 | LeadingContext: 1, 104 | TrailingContext: 1, 105 | }, 106 | }, 107 | "deleteWithContext": { 108 | Input: ` context line 109 | -old line 1 110 | -old line 2 111 | context line 112 | `, 113 | Fragment: TextFragment{ 114 | OldLines: 4, 115 | NewLines: 2, 116 | }, 117 | Output: &TextFragment{ 118 | OldLines: 4, 119 | NewLines: 2, 120 | Lines: []Line{ 121 | {OpContext, "context line\n"}, 122 | {OpDelete, "old line 1\n"}, 123 | {OpDelete, "old line 2\n"}, 124 | {OpContext, "context line\n"}, 125 | }, 126 | LinesDeleted: 2, 127 | LeadingContext: 1, 128 | TrailingContext: 1, 129 | }, 130 | }, 131 | "replaceWithContext": { 132 | Input: ` context line 133 | -old line 1 134 | +new line 1 135 | context line 136 | `, 137 | Fragment: TextFragment{ 138 | OldLines: 3, 139 | NewLines: 3, 140 | }, 141 | Output: &TextFragment{ 142 | OldLines: 3, 143 | NewLines: 3, 144 | Lines: []Line{ 145 | {OpContext, "context line\n"}, 146 | {OpDelete, "old line 1\n"}, 147 | {OpAdd, "new line 1\n"}, 148 | {OpContext, "context line\n"}, 149 | }, 150 | LinesDeleted: 1, 151 | LinesAdded: 1, 152 | LeadingContext: 1, 153 | TrailingContext: 1, 154 | }, 155 | }, 156 | "middleContext": { 157 | Input: ` context line 158 | -old line 1 159 | context line 160 | +new line 1 161 | context line 162 | `, 163 | Fragment: TextFragment{ 164 | OldLines: 4, 165 | NewLines: 4, 166 | }, 167 | Output: &TextFragment{ 168 | OldLines: 4, 169 | NewLines: 4, 170 | Lines: []Line{ 171 | {OpContext, "context line\n"}, 172 | {OpDelete, "old line 1\n"}, 173 | {OpContext, "context line\n"}, 174 | {OpAdd, "new line 1\n"}, 175 | {OpContext, "context line\n"}, 176 | }, 177 | LinesDeleted: 1, 178 | LinesAdded: 1, 179 | LeadingContext: 1, 180 | TrailingContext: 1, 181 | }, 182 | }, 183 | "deleteFinalNewline": { 184 | Input: ` context line 185 | -old line 1 186 | +new line 1 187 | \ No newline at end of file 188 | `, 189 | Fragment: TextFragment{ 190 | OldLines: 2, 191 | NewLines: 2, 192 | }, 193 | Output: &TextFragment{ 194 | OldLines: 2, 195 | NewLines: 2, 196 | Lines: []Line{ 197 | {OpContext, "context line\n"}, 198 | {OpDelete, "old line 1\n"}, 199 | {OpAdd, "new line 1"}, 200 | }, 201 | LinesDeleted: 1, 202 | LinesAdded: 1, 203 | LeadingContext: 1, 204 | }, 205 | }, 206 | "addFinalNewline": { 207 | Input: ` context line 208 | -old line 1 209 | \ No newline at end of file 210 | +new line 1 211 | `, 212 | Fragment: TextFragment{ 213 | OldLines: 2, 214 | NewLines: 2, 215 | }, 216 | Output: &TextFragment{ 217 | OldLines: 2, 218 | NewLines: 2, 219 | Lines: []Line{ 220 | {OpContext, "context line\n"}, 221 | {OpDelete, "old line 1"}, 222 | {OpAdd, "new line 1\n"}, 223 | }, 224 | LinesDeleted: 1, 225 | LinesAdded: 1, 226 | LeadingContext: 1, 227 | }, 228 | }, 229 | "addAll": { 230 | Input: `+new line 1 231 | +new line 2 232 | +new line 3 233 | `, 234 | Fragment: TextFragment{ 235 | OldLines: 0, 236 | NewLines: 3, 237 | }, 238 | Output: &TextFragment{ 239 | OldLines: 0, 240 | NewLines: 3, 241 | Lines: []Line{ 242 | {OpAdd, "new line 1\n"}, 243 | {OpAdd, "new line 2\n"}, 244 | {OpAdd, "new line 3\n"}, 245 | }, 246 | LinesAdded: 3, 247 | }, 248 | }, 249 | "deleteAll": { 250 | Input: `-old line 1 251 | -old line 2 252 | -old line 3 253 | `, 254 | Fragment: TextFragment{ 255 | OldLines: 3, 256 | NewLines: 0, 257 | }, 258 | Output: &TextFragment{ 259 | OldLines: 3, 260 | NewLines: 0, 261 | Lines: []Line{ 262 | {OpDelete, "old line 1\n"}, 263 | {OpDelete, "old line 2\n"}, 264 | {OpDelete, "old line 3\n"}, 265 | }, 266 | LinesDeleted: 3, 267 | }, 268 | }, 269 | "emptyContextLine": { 270 | Input: ` context line 271 | 272 | +new line 273 | context line 274 | `, 275 | Fragment: TextFragment{ 276 | OldLines: 3, 277 | NewLines: 4, 278 | }, 279 | Output: &TextFragment{ 280 | OldLines: 3, 281 | NewLines: 4, 282 | Lines: []Line{ 283 | {OpContext, "context line\n"}, 284 | {OpContext, "\n"}, 285 | {OpAdd, "new line\n"}, 286 | {OpContext, "context line\n"}, 287 | }, 288 | LinesAdded: 1, 289 | LeadingContext: 2, 290 | TrailingContext: 1, 291 | }, 292 | }, 293 | "emptyChunk": { 294 | Input: "", 295 | Err: true, 296 | }, 297 | "invalidOperation": { 298 | Input: ` context line 299 | ?wat line 300 | context line 301 | `, 302 | Fragment: TextFragment{ 303 | OldLines: 3, 304 | NewLines: 3, 305 | }, 306 | Err: true, 307 | }, 308 | "unbalancedHeader": { 309 | Input: ` context line 310 | -old line 1 311 | +new line 1 312 | context line 313 | `, 314 | Fragment: TextFragment{ 315 | OldLines: 2, 316 | NewLines: 5, 317 | }, 318 | Err: true, 319 | }, 320 | "onlyContext": { 321 | Input: ` context line 322 | context line 323 | `, 324 | Fragment: TextFragment{ 325 | OldLines: 2, 326 | NewLines: 2, 327 | }, 328 | Err: true, 329 | }, 330 | "unexpectedNoNewlineMarker": { 331 | Input: `\ No newline at end of file`, 332 | Fragment: TextFragment{ 333 | OldLines: 1, 334 | NewLines: 1, 335 | }, 336 | Err: true, 337 | }, 338 | } 339 | 340 | for name, test := range tests { 341 | t.Run(name, func(t *testing.T) { 342 | p := newTestParser(test.Input, true) 343 | 344 | frag := test.Fragment 345 | err := p.ParseTextChunk(&frag) 346 | if test.Err { 347 | if err == nil || err == io.EOF { 348 | t.Fatalf("expected error parsing text chunk, but got %v", err) 349 | } 350 | return 351 | } 352 | if err != nil { 353 | t.Fatalf("error parsing text chunk: %v", err) 354 | } 355 | 356 | if !reflect.DeepEqual(test.Output, &frag) { 357 | t.Errorf("incorrect fragment\nexpected: %+v\nactual: %+v", test.Output, &frag) 358 | } 359 | }) 360 | } 361 | } 362 | 363 | func TestParseTextFragments(t *testing.T) { 364 | tests := map[string]struct { 365 | Input string 366 | File File 367 | 368 | Fragments []*TextFragment 369 | Err bool 370 | }{ 371 | "multipleChanges": { 372 | Input: `@@ -1,3 +1,2 @@ 373 | context line 374 | -old line 1 375 | context line 376 | @@ -8,3 +7,3 @@ 377 | context line 378 | -old line 2 379 | +new line 1 380 | context line 381 | @@ -15,3 +14,4 @@ 382 | context line 383 | -old line 3 384 | +new line 2 385 | +new line 3 386 | context line 387 | `, 388 | Fragments: []*TextFragment{ 389 | { 390 | OldPosition: 1, 391 | OldLines: 3, 392 | NewPosition: 1, 393 | NewLines: 2, 394 | Lines: []Line{ 395 | {OpContext, "context line\n"}, 396 | {OpDelete, "old line 1\n"}, 397 | {OpContext, "context line\n"}, 398 | }, 399 | LinesDeleted: 1, 400 | LeadingContext: 1, 401 | TrailingContext: 1, 402 | }, 403 | { 404 | OldPosition: 8, 405 | OldLines: 3, 406 | NewPosition: 7, 407 | NewLines: 3, 408 | Lines: []Line{ 409 | {OpContext, "context line\n"}, 410 | {OpDelete, "old line 2\n"}, 411 | {OpAdd, "new line 1\n"}, 412 | {OpContext, "context line\n"}, 413 | }, 414 | LinesDeleted: 1, 415 | LinesAdded: 1, 416 | LeadingContext: 1, 417 | TrailingContext: 1, 418 | }, 419 | { 420 | OldPosition: 15, 421 | OldLines: 3, 422 | NewPosition: 14, 423 | NewLines: 4, 424 | Lines: []Line{ 425 | {OpContext, "context line\n"}, 426 | {OpDelete, "old line 3\n"}, 427 | {OpAdd, "new line 2\n"}, 428 | {OpAdd, "new line 3\n"}, 429 | {OpContext, "context line\n"}, 430 | }, 431 | LinesDeleted: 1, 432 | LinesAdded: 2, 433 | LeadingContext: 1, 434 | TrailingContext: 1, 435 | }, 436 | }, 437 | }, 438 | "badNewFile": { 439 | Input: `@@ -1 +1,2 @@ 440 | -old line 1 441 | +new line 1 442 | +new line 2 443 | `, 444 | File: File{ 445 | IsNew: true, 446 | }, 447 | Err: true, 448 | }, 449 | "badDeletedFile": { 450 | Input: `@@ -1,2 +1 @@ 451 | -old line 1 452 | context line 453 | `, 454 | File: File{ 455 | IsDelete: true, 456 | }, 457 | Err: true, 458 | }, 459 | } 460 | 461 | for name, test := range tests { 462 | t.Run(name, func(t *testing.T) { 463 | p := newTestParser(test.Input, true) 464 | 465 | file := test.File 466 | n, err := p.ParseTextFragments(&file) 467 | if test.Err { 468 | if err == nil || err == io.EOF { 469 | t.Fatalf("expected error parsing text fragments, but got %v", err) 470 | } 471 | return 472 | } 473 | if err != nil { 474 | t.Fatalf("error parsing text fragments: %v", err) 475 | } 476 | 477 | if len(test.Fragments) != n { 478 | t.Fatalf("incorrect number of added fragments: expected %d, actual %d", len(test.Fragments), n) 479 | } 480 | 481 | for i, frag := range test.Fragments { 482 | if !reflect.DeepEqual(frag, file.TextFragments[i]) { 483 | t.Errorf("incorrect fragment at position %d\nexpected: %+v\nactual: %+v", i, frag, file.TextFragments[i]) 484 | } 485 | } 486 | }) 487 | } 488 | } 489 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/bluekeyes/go-gitdiff 2 | 3 | go 1.21 4 | --------------------------------------------------------------------------------