├── go.mod
├── testdata
    └── scratch.pack
├── README.md
├── LICENSE
├── git_test.go
├── pkt.go
├── pack.go
├── git.go
└── fs.go


/go.mod:
--------------------------------------------------------------------------------
1 | module rsc.io/gitfs
2 | 
3 | go 1.21.0
4 | 


--------------------------------------------------------------------------------
/testdata/scratch.pack:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rsc/gitfs/HEAD/testdata/scratch.pack


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Package gitfs presents a file tree downloaded from a remote Git repo as an in-memory fs.FS.
2 | See the [API reference](https://pkg.go.dev/rsc.io/gitfs).
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2009 The Go Authors. All rights reserved.
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions are
 5 | met:
 6 | 
 7 |    * Redistributions of source code must retain the above copyright
 8 | notice, this list of conditions and the following disclaimer.
 9 |    * Redistributions in binary form must reproduce the above
10 | copyright notice, this list of conditions and the following disclaimer
11 | in the documentation and/or other materials provided with the
12 | distribution.
13 |    * Neither the name of Google Inc. nor the names of its
14 | contributors may be used to endorse or promote products derived from
15 | this software without specific prior written permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/git_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2021 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package gitfs
 6 | 
 7 | import (
 8 | 	"io/fs"
 9 | 	"io/ioutil"
10 | 	"testing"
11 | )
12 | 
13 | func TestGerrit(t *testing.T) {
14 | 	if testing.Short() {
15 | 		t.Skip("skipping Gerrit network access in -short mode")
16 | 	}
17 | 	r, err := NewRepo("https://go.googlesource.com/scratch")
18 | 	if err != nil {
19 | 		t.Fatal(err)
20 | 	}
21 | 	_, fsys, err := r.Clone("HEAD")
22 | 	if err != nil {
23 | 		t.Fatal(err)
24 | 	}
25 | 	data, err := fs.ReadFile(fsys, "README.md")
26 | 	if err != nil {
27 | 		t.Fatal(err)
28 | 	}
29 | 	t.Log(string(data))
30 | }
31 | 
32 | func TestGitHub(t *testing.T) {
33 | 	if testing.Short() {
34 | 		t.Skip("skipping GitHub network access in -short mode")
35 | 	}
36 | 	r, err := NewRepo("https://github.com/rsc/quote")
37 | 	if err != nil {
38 | 		t.Fatal(err)
39 | 	}
40 | 	_, fsys, err := r.Clone("HEAD")
41 | 	if err != nil {
42 | 		t.Fatal(err)
43 | 	}
44 | 	data, err := fs.ReadFile(fsys, "README.md")
45 | 	if err != nil {
46 | 		t.Fatal(err)
47 | 	}
48 | 	t.Log(string(data))
49 | }
50 | 
51 | func TestPack(t *testing.T) {
52 | 	data, err := ioutil.ReadFile("testdata/scratch.pack")
53 | 	if err != nil {
54 | 		t.Fatal(err)
55 | 	}
56 | 	var s store
57 | 	err = unpack(&s, data)
58 | 	if err != nil {
59 | 		t.Fatal(err)
60 | 	}
61 | 
62 | 	h := Hash{0xf6, 0xf7, 0x39, 0x2a, 0x99, 0x9b, 0x3d, 0x75, 0xe2, 0x1c, 0xae, 0xe3, 0x3a, 0xeb, 0x6d, 0x01, 0x92, 0xe8, 0xdc, 0x6b}
63 | 	tfs, err := s.commit(h)
64 | 	if err != nil {
65 | 		t.Fatal(err)
66 | 	}
67 | 
68 | 	data, err = fs.ReadFile(tfs, "rsc/greeting.go")
69 | 	if err != nil {
70 | 		t.Fatal(err)
71 | 	}
72 | 	println(string(data))
73 | }
74 | 


--------------------------------------------------------------------------------
/pkt.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2021 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package gitfs
  6 | 
  7 | import (
  8 | 	"bufio"
  9 | 	"fmt"
 10 | 	"io"
 11 | 	"strconv"
 12 | 	"strings"
 13 | )
 14 | 
 15 | // A pktLineReader reads Git pkt-line-formatted packets.
 16 | //
 17 | // Each n-byte packet is preceded by a 4-digit hexadecimal length
 18 | // encoding n+4 (the length counts its own bytes), like "0006a\n" for "a\n".
 19 | //
 20 | // A packet starting with 0000 is a so-called flush packet.
 21 | // A packet starting with 0001 is a delimiting marker,
 22 | // which usually marks the end of a sequence in the stream.
 23 | //
 24 | // See https://git-scm.com/docs/protocol-common#_pkt_line_format
 25 | // for the official documentation, although it fails to mention the 0001 packets.
 26 | type pktLineReader struct {
 27 | 	b    *bufio.Reader
 28 | 	size [4]byte
 29 | }
 30 | 
 31 | // newPktLineReader returns a new pktLineReader reading from r.
 32 | func newPktLineReader(r io.Reader) *pktLineReader {
 33 | 	return &pktLineReader{b: bufio.NewReader(r)}
 34 | }
 35 | 
 36 | // Next returns the payload of the next packet from the stream.
 37 | // If the next packet is a flush packet (length 0000), Next returns nil, io.EOF.
 38 | // If the next packet is a delimiter packet (length 0001), Next returns nil, nil.
 39 | // If the data stream has ended, Next returns nil, io.ErrUnexpectedEOF.
 40 | func (r *pktLineReader) Next() ([]byte, error) {
 41 | 	_, err := io.ReadFull(r.b, r.size[:])
 42 | 	if err != nil {
 43 | 		if err == io.EOF {
 44 | 			err = io.ErrUnexpectedEOF
 45 | 		}
 46 | 		return nil, err
 47 | 	}
 48 | 	n, err := strconv.ParseUint(string(r.size[:]), 16, 0)
 49 | 	if err != nil || n == 2 || n == 3 {
 50 | 		return nil, fmt.Errorf("malformed pkt-line")
 51 | 	}
 52 | 	if n == 1 {
 53 | 		return nil, nil // delimiter
 54 | 	}
 55 | 	if n == 0 {
 56 | 		return nil, io.EOF
 57 | 	}
 58 | 	buf := make([]byte, n-4)
 59 | 	_, err = io.ReadFull(r.b, buf)
 60 | 	if err != nil {
 61 | 		if err == io.EOF {
 62 | 			err = io.ErrUnexpectedEOF
 63 | 		}
 64 | 		return nil, err
 65 | 	}
 66 | 	return buf, nil
 67 | }
 68 | 
 69 | // Lines reads packets from r until a flush packet.
 70 | // It returns a string for each packet, with any trailing newline trimmed.
 71 | func (r *pktLineReader) Lines() ([]string, error) {
 72 | 	var lines []string
 73 | 	for {
 74 | 		line, err := r.Next()
 75 | 		if err != nil {
 76 | 			if err == io.EOF {
 77 | 				err = nil
 78 | 			}
 79 | 			return lines, err
 80 | 		}
 81 | 		lines = append(lines, strings.TrimSuffix(string(line), "\n"))
 82 | 	}
 83 | }
 84 | 
 85 | // A pktLineWriter writes Git pkt-line-formatted packets.
 86 | // See pktLineReader for a description of the packet format.
 87 | type pktLineWriter struct {
 88 | 	b    *bufio.Writer
 89 | 	size [4]byte
 90 | }
 91 | 
 92 | // newPktLineWriter returns a new pktLineWriter writing to w.
 93 | func newPktLineWriter(w io.Writer) *pktLineWriter {
 94 | 	return &pktLineWriter{b: bufio.NewWriter(w)}
 95 | }
 96 | 
 97 | // writeSize writes a four-digit hexadecimal length packet for n.
 98 | // Typically n is len(data)+4.
 99 | func (w *pktLineWriter) writeSize(n int) {
100 | 	hex := "0123456789abcdef"
101 | 	w.size[0] = hex[n>>12]
102 | 	w.size[1] = hex[(n>>8)&0xf]
103 | 	w.size[2] = hex[(n>>4)&0xf]
104 | 	w.size[3] = hex[(n>>0)&0xf]
105 | 	w.b.Write(w.size[:])
106 | }
107 | 
108 | // Write writes b as a single packet.
109 | func (w *pktLineWriter) Write(b []byte) (int, error) {
110 | 	n := len(b)
111 | 	if n+4 > 0xffff {
112 | 		return 0, fmt.Errorf("write too large")
113 | 	}
114 | 	w.writeSize(n + 4)
115 | 	w.b.Write(b)
116 | 	return n, nil
117 | }
118 | 
119 | // WriteString writes s as a single packet.
120 | func (w *pktLineWriter) WriteString(s string) (int, error) {
121 | 	n := len(s)
122 | 	if n+4 > 0xffff {
123 | 		return 0, fmt.Errorf("write too large")
124 | 	}
125 | 	w.writeSize(n + 4)
126 | 	w.b.WriteString(s)
127 | 	return n, nil
128 | }
129 | 
130 | // Close writes a terminating flush packet
131 | // and flushes buffered data to the underlying writer.
132 | func (w *pktLineWriter) Close() error {
133 | 	w.b.WriteString("0000")
134 | 	w.b.Flush()
135 | 	return nil
136 | }
137 | 
138 | // Delim writes a delimiter packet.
139 | func (w *pktLineWriter) Delim() {
140 | 	w.b.WriteString("0001")
141 | }
142 | 


--------------------------------------------------------------------------------
/pack.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2021 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package gitfs
  6 | 
  7 | import (
  8 | 	"bytes"
  9 | 	"compress/zlib"
 10 | 	"crypto/sha1"
 11 | 	"encoding/binary"
 12 | 	"fmt"
 13 | 	"io/ioutil"
 14 | )
 15 | 
 16 | // unpack parses data, which is a Git pack-formatted archive,
 17 | // writing every object it contains to the store s.
 18 | //
 19 | // See https://git-scm.com/docs/pack-format for format documentation.
 20 | func unpack(s *store, data []byte) error {
 21 | 	// If the store is empty, pre-allocate the length of data.
 22 | 	// This should be about the right order of magnitude for the eventual data,
 23 | 	// avoiding many growing steps during append.
 24 | 	if len(s.data) == 0 {
 25 | 		s.data = make([]byte, 0, len(data))
 26 | 	}
 27 | 
 28 | 	// Pack data starts with 12-byte header: "PACK" version[4] nobj[4].
 29 | 	if len(data) < 12+20 {
 30 | 		return fmt.Errorf("malformed git pack: too short")
 31 | 	}
 32 | 	hdr := data[:12]
 33 | 	vers := binary.BigEndian.Uint32(hdr[4:8])
 34 | 	nobj := binary.BigEndian.Uint32(hdr[8:12])
 35 | 	if string(hdr[:4]) != "PACK" || vers != 2 && vers != 3 || len(data) < 12+20 || int64(nobj) >= int64(len(data)) {
 36 | 		return fmt.Errorf("malformed git pack")
 37 | 	}
 38 | 	if vers == 3 {
 39 | 		return fmt.Errorf("cannot read git pack v3")
 40 | 	}
 41 | 
 42 | 	// Pack data ends with SHA1 of the entire pack.
 43 | 	sum := sha1.Sum(data[:len(data)-20])
 44 | 	if !bytes.Equal(sum[:], data[len(data)-20:]) {
 45 | 		return fmt.Errorf("malformed git pack: bad checksum")
 46 | 	}
 47 | 
 48 | 	// Object data is everything between hdr and ending SHA1.
 49 | 	// Unpack every object into the store.
 50 | 	objs := data[12 : len(data)-20]
 51 | 	off := 0
 52 | 	for i := 0; i < int(nobj); i++ {
 53 | 		_, _, _, encSize, err := unpackObject(s, objs, off)
 54 | 		if err != nil {
 55 | 			return fmt.Errorf("unpack: malformed git pack: %v", err)
 56 | 		}
 57 | 		off += encSize
 58 | 	}
 59 | 	if off != len(objs) {
 60 | 		return fmt.Errorf("malformed git pack: junk after objects")
 61 | 	}
 62 | 	return nil
 63 | }
 64 | 
 65 | // unpackObject unpacks the object at objs[off:] and writes it to the store s.
 66 | // It returns the type, hash, and content of the object, as well as the encoded size,
 67 | // meaning the number of bytes at the start of objs[off:] that this record occupies.
 68 | func unpackObject(s *store, objs []byte, off int) (typ objType, h Hash, content []byte, encSize int, err error) {
 69 | 	fail := func(err error) (objType, Hash, []byte, int, error) {
 70 | 		return 0, Hash{}, nil, 0, err
 71 | 	}
 72 | 	if off < 0 || off >= len(objs) {
 73 | 		return fail(fmt.Errorf("invalid object offset"))
 74 | 	}
 75 | 
 76 | 	// Object starts with varint-encoded type and length n.
 77 | 	// (The length n is the length of the compressed data that follows,
 78 | 	// not the length of the actual object.)
 79 | 	u, size := binary.Uvarint(objs[off:])
 80 | 	if size <= 0 {
 81 | 		return fail(fmt.Errorf("invalid object: bad varint header"))
 82 | 	}
 83 | 	typ = objType((u >> 4) & 7)
 84 | 	n := int(u&15 | u>>7<<4)
 85 | 
 86 | 	// Git often stores objects that differ very little (different revs of a file).
 87 | 	// It can save space by encoding one as "start with this other object and apply these diffs".
 88 | 	// There are two ways to specify "this other object": an object ref (20-byte SHA1)
 89 | 	// or as a relative offset to an earlier position in the objs slice.
 90 | 	// For either of these, we need to fetch the other object's type and data (deltaTyp and deltaBase).
 91 | 	// The Git docs call this the "deltified representation".
 92 | 	var deltaTyp objType
 93 | 	var deltaBase []byte
 94 | 	switch typ {
 95 | 	case objRefDelta:
 96 | 		if len(objs)-(off+size) < 20 {
 97 | 			return fail(fmt.Errorf("invalid object: bad delta ref"))
 98 | 		}
 99 | 		// Base block identified by SHA1 of an already unpacked hash.
100 | 		var h Hash
101 | 		copy(h[:], objs[off+size:])
102 | 		size += 20
103 | 		deltaTyp, deltaBase = s.object(h)
104 | 		if deltaTyp == 0 {
105 | 			return fail(fmt.Errorf("invalid object: unknown delta ref %v", h))
106 | 		}
107 | 
108 | 	case objOfsDelta:
109 | 		i := off + size
110 | 		if len(objs)-i < 20 {
111 | 			return fail(fmt.Errorf("invalid object: too short"))
112 | 		}
113 | 		// Base block identified by relative offset to earlier position in objs,
114 | 		// using a varint-like but not-quite-varint encoding.
115 | 		// Look for "offset encoding:" in https://git-scm.com/docs/pack-format.
116 | 		d := int64(objs[i] & 0x7f)
117 | 		for objs[i]&0x80 != 0 {
118 | 			i++
119 | 			if i-(off+size) > 10 {
120 | 				return fail(fmt.Errorf("invalid object: malformed delta offset"))
121 | 			}
122 | 			d = d<<7 | int64(objs[i]&0x7f)
123 | 			d += 1 << 7
124 | 		}
125 | 		i++
126 | 		size = i - off
127 | 
128 | 		// Re-unpack the object at the earlier offset to find its type and content.
129 | 		if d == 0 || d > int64(off) {
130 | 			return fail(fmt.Errorf("invalid object: bad delta offset"))
131 | 		}
132 | 		var err error
133 | 		deltaTyp, _, deltaBase, _, err = unpackObject(s, objs, off-int(d))
134 | 		if err != nil {
135 | 			return fail(fmt.Errorf("invalid object: bad delta offset"))
136 | 		}
137 | 	}
138 | 
139 | 	// The main encoded data is a zlib-compressed stream.
140 | 	br := bytes.NewReader(objs[off+size:])
141 | 	zr, err := zlib.NewReader(br)
142 | 	if err != nil {
143 | 		return fail(fmt.Errorf("invalid object deflate: %v", err))
144 | 	}
145 | 	data, err := ioutil.ReadAll(zr)
146 | 	if err != nil {
147 | 		return fail(fmt.Errorf("invalid object: bad deflate: %v", err))
148 | 	}
149 | 	if len(data) != n {
150 | 		return fail(fmt.Errorf("invalid object: deflate size %d != %d", len(data), n))
151 | 	}
152 | 	encSize = len(objs[off:]) - br.Len()
153 | 
154 | 	// If we fetched a base object above, the stream is an encoded delta.
155 | 	// Otherwise it is the raw data.
156 | 	switch typ {
157 | 	default:
158 | 		return fail(fmt.Errorf("invalid object: unknown object type"))
159 | 	case objCommit, objTree, objBlob, objTag:
160 | 		// ok
161 | 	case objRefDelta, objOfsDelta:
162 | 		// Actual object type is the type of the base object.
163 | 		typ = deltaTyp
164 | 
165 | 		// Delta encoding starts with size of base object and size of new object.
166 | 		baseSize, s := binary.Uvarint(data)
167 | 		data = data[s:]
168 | 		if baseSize != uint64(len(deltaBase)) {
169 | 			return fail(fmt.Errorf("invalid object: mismatched delta src size"))
170 | 		}
171 | 		targSize, s := binary.Uvarint(data)
172 | 		data = data[s:]
173 | 
174 | 		// Apply delta to base object, producing new object.
175 | 		targ := make([]byte, targSize)
176 | 		if err := applyDelta(targ, deltaBase, data); err != nil {
177 | 			return fail(fmt.Errorf("invalid object: %v", err))
178 | 		}
179 | 		data = targ
180 | 	}
181 | 
182 | 	h, data = s.add(typ, data)
183 | 	return typ, h, data, encSize, nil
184 | }
185 | 
186 | // applyDelta applies the delta encoding to src, producing dst,
187 | // which has already been allocated to the expected final size.
188 | // See https://git-scm.com/docs/pack-format#_deltified_representation for docs.
189 | func applyDelta(dst, src, delta []byte) error {
190 | 	for len(delta) > 0 {
191 | 		// Command byte says what comes next.
192 | 		cmd := delta[0]
193 | 		delta = delta[1:]
194 | 		switch {
195 | 		case cmd == 0:
196 | 			// cmd == 0 is reserved.
197 | 			return fmt.Errorf("invalid delta cmd")
198 | 
199 | 		case cmd&0x80 != 0:
200 | 			// Copy from base object, 4-byte offset, 3-byte size.
201 | 			// But any zero byte in the offset or size can be omitted.
202 | 			// The bottom 7 bits of cmd say which offset/size bytes are present.
203 | 			var off, size int64
204 | 			for i := uint(0); i < 4; i++ {
205 | 				if cmd&(1<<i) != 0 {
206 | 					off |= int64(delta[0]) << (8 * i)
207 | 					delta = delta[1:]
208 | 				}
209 | 			}
210 | 			for i := uint(0); i < 3; i++ {
211 | 				if cmd&(0x10<<i) != 0 {
212 | 					size |= int64(delta[0]) << (8 * i)
213 | 					delta = delta[1:]
214 | 				}
215 | 			}
216 | 			// Size 0 means size 0x10000 for some reason. (!)
217 | 			if size == 0 {
218 | 				size = 0x10000
219 | 			}
220 | 			copy(dst[:size], src[off:off+size])
221 | 			dst = dst[size:]
222 | 
223 | 		default:
224 | 			// Up to 0x7F bytes of literal data, length in bottom 7 bits of cmd.
225 | 			n := int(cmd)
226 | 			copy(dst[:n], delta[:n])
227 | 			dst = dst[n:]
228 | 			delta = delta[n:]
229 | 		}
230 | 	}
231 | 	if len(dst) != 0 {
232 | 		return fmt.Errorf("delta encoding too short")
233 | 	}
234 | 	return nil
235 | }
236 | 


--------------------------------------------------------------------------------
/git.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2021 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | // Package gitfs presents a file tree downloaded from a remote Git repo as an in-memory fs.FS.
  6 | package gitfs
  7 | 
  8 | import (
  9 | 	"bytes"
 10 | 	"encoding/hex"
 11 | 	"fmt"
 12 | 	"io"
 13 | 	"io/fs"
 14 | 	"io/ioutil"
 15 | 	"net/http"
 16 | 	"strings"
 17 | )
 18 | 
 19 | // A Repo is a connection to a remote repository served over HTTP or HTTPS.
 20 | type Repo struct {
 21 | 	url  string // trailing slash removed
 22 | 	caps map[string]string
 23 | }
 24 | 
 25 | // NewRepo connects to a Git repository at the given http:// or https:// URL.
 26 | func NewRepo(url string) (*Repo, error) {
 27 | 	r := &Repo{url: strings.TrimSuffix(url, "/")}
 28 | 	if err := r.handshake(); err != nil {
 29 | 		return nil, err
 30 | 	}
 31 | 	return r, nil
 32 | }
 33 | 
 34 | // handshake runs the initial Git opening handshake, learning the capabilities of the server.
 35 | // See https://git-scm.com/docs/protocol-v2#_initial_client_request.
 36 | func (r *Repo) handshake() error {
 37 | 	req, _ := http.NewRequest("GET", r.url+"/info/refs?service=git-upload-pack", nil)
 38 | 	req.Header.Set("Accept", "*/*")
 39 | 	req.Header.Set("Git-Protocol", "version=2")
 40 | 
 41 | 	resp, err := http.DefaultClient.Do(req)
 42 | 	if err != nil {
 43 | 		return fmt.Errorf("handshake: %v", err)
 44 | 	}
 45 | 	data, err := ioutil.ReadAll(resp.Body)
 46 | 	if resp.StatusCode != 200 {
 47 | 		return fmt.Errorf("handshake: %v\n%s", resp.Status, data)
 48 | 	}
 49 | 	if err != nil {
 50 | 		return fmt.Errorf("handshake: reading body: %v", err)
 51 | 	}
 52 | 	if ct := resp.Header.Get("Content-Type"); ct != "application/x-git-upload-pack-advertisement" {
 53 | 		return fmt.Errorf("handshake: invalid response Content-Type: %v", ct)
 54 | 	}
 55 | 
 56 | 	pr := newPktLineReader(bytes.NewReader(data))
 57 | 	lines, err := pr.Lines()
 58 | 	if len(lines) == 1 && lines[0] == "# service=git-upload-pack" {
 59 | 		lines, err = pr.Lines()
 60 | 	}
 61 | 	if err != nil {
 62 | 		return fmt.Errorf("handshake: parsing response: %v", err)
 63 | 	}
 64 | 	caps := make(map[string]string)
 65 | 	for _, line := range lines {
 66 | 		verb, args, _ := strings.Cut(line, "=")
 67 | 		caps[verb] = args
 68 | 	}
 69 | 	if _, ok := caps["version 2"]; !ok {
 70 | 		return fmt.Errorf("handshake: not version 2: %q", lines)
 71 | 	}
 72 | 	r.caps = caps
 73 | 	return nil
 74 | }
 75 | 
 76 | // Resolve looks up the given ref and returns the corresponding Hash.
 77 | func (r *Repo) Resolve(ref string) (Hash, error) {
 78 | 	if h, err := parseHash(ref); err == nil {
 79 | 		return h, nil
 80 | 	}
 81 | 
 82 | 	fail := func(err error) (Hash, error) {
 83 | 		return Hash{}, fmt.Errorf("resolve %s: %v", ref, err)
 84 | 	}
 85 | 	refs, err := r.refs(ref)
 86 | 	if err != nil {
 87 | 		return fail(err)
 88 | 	}
 89 | 	for _, known := range refs {
 90 | 		if known.name == ref {
 91 | 			return known.hash, nil
 92 | 		}
 93 | 	}
 94 | 	return fail(fmt.Errorf("unknown ref"))
 95 | }
 96 | 
 97 | // A ref is a single Git reference, like refs/heads/main, refs/tags/v1.0.0, or HEAD.
 98 | type ref struct {
 99 | 	name string // "refs/heads/main", "refs/tags/v1.0.0", "HEAD"
100 | 	hash Hash   // hexadecimal hash
101 | }
102 | 
103 | // refs executes an ls-refs command on the remote server
104 | // to look up refs with the given prefixes.
105 | // See https://git-scm.com/docs/protocol-v2#_ls_refs.
106 | func (r *Repo) refs(prefixes ...string) ([]ref, error) {
107 | 	if _, ok := r.caps["ls-refs"]; !ok {
108 | 		return nil, fmt.Errorf("refs: server does not support ls-refs")
109 | 	}
110 | 
111 | 	var buf bytes.Buffer
112 | 	pw := newPktLineWriter(&buf)
113 | 	pw.WriteString("command=ls-refs")
114 | 	pw.Delim()
115 | 	pw.WriteString("peel")
116 | 	pw.WriteString("symrefs")
117 | 	for _, prefix := range prefixes {
118 | 		pw.WriteString("ref-prefix " + prefix)
119 | 	}
120 | 	pw.Close()
121 | 	postbody := buf.Bytes()
122 | 
123 | 	req, _ := http.NewRequest("POST", r.url+"/git-upload-pack", &buf)
124 | 	req.Header.Set("Content-Type", "application/x-git-upload-pack-request")
125 | 	req.Header.Set("Accept", "application/x-git-upload-pack-result")
126 | 	req.Header.Set("Git-Protocol", "version=2")
127 | 
128 | 	resp, err := http.DefaultClient.Do(req)
129 | 	if err != nil {
130 | 		return nil, fmt.Errorf("refs: %v", err)
131 | 	}
132 | 	defer resp.Body.Close()
133 | 	data, err := ioutil.ReadAll(resp.Body)
134 | 	if resp.StatusCode != 200 {
135 | 		return nil, fmt.Errorf("refs: %v\n%s", resp.Status, data)
136 | 	}
137 | 	if err != nil {
138 | 		return nil, fmt.Errorf("refs: reading body: %v", err)
139 | 	}
140 | 	if ct := resp.Header.Get("Content-Type"); ct != "application/x-git-upload-pack-result" {
141 | 		return nil, fmt.Errorf("refs: invalid response Content-Type: %v", ct)
142 | 	}
143 | 
144 | 	var refs []ref
145 | 	lines, err := newPktLineReader(bytes.NewReader(data)).Lines()
146 | 	if err != nil {
147 | 		return nil, fmt.Errorf("refs: parsing response: %v %d\n%s\n%s", err, len(data), hex.Dump(postbody), hex.Dump(data))
148 | 	}
149 | 	for _, line := range lines {
150 | 		hash, rest, ok := strings.Cut(line, " ")
151 | 		if !ok {
152 | 			return nil, fmt.Errorf("refs: parsing response: invalid line: %q", line)
153 | 		}
154 | 		h, err := parseHash(hash)
155 | 		if err != nil {
156 | 			return nil, fmt.Errorf("refs: parsing response: invalid line: %q", line)
157 | 		}
158 | 		name, _, _ := strings.Cut(rest, " ")
159 | 		refs = append(refs, ref{hash: h, name: name})
160 | 	}
161 | 	return refs, nil
162 | }
163 | 
164 | // Clone resolves the given ref to a hash and returns the corresponding fs.FS.
165 | func (r *Repo) Clone(ref string) (Hash, fs.FS, error) {
166 | 	fail := func(err error) (Hash, fs.FS, error) {
167 | 		return Hash{}, nil, fmt.Errorf("clone %s: %v", ref, err)
168 | 	}
169 | 	h, err := r.Resolve(ref)
170 | 	if err != nil {
171 | 		return fail(err)
172 | 	}
173 | 	tfs, err := r.fetch(h)
174 | 	if err != nil {
175 | 		return fail(err)
176 | 	}
177 | 	return h, tfs, nil
178 | }
179 | 
180 | // CloneHash returns the fs.FS for the given hash.
181 | func (r *Repo) CloneHash(h Hash) (fs.FS, error) {
182 | 	tfs, err := r.fetch(h)
183 | 	if err != nil {
184 | 		return nil, fmt.Errorf("clone %s: %v", h, err)
185 | 	}
186 | 	return tfs, nil
187 | }
188 | 
189 | // fetch returns the fs.FS for a given hash.
190 | func (r *Repo) fetch(h Hash) (fs.FS, error) {
191 | 	// Fetch a shallow packfile from the remote server.
192 | 	// Shallow means it only contains the tree at that one commit,
193 | 	// not the entire history of the repo.
194 | 	// See https://git-scm.com/docs/protocol-v2#_fetch.
195 | 	opts, ok := r.caps["fetch"]
196 | 	if !ok {
197 | 		return nil, fmt.Errorf("fetch: server does not support fetch")
198 | 	}
199 | 	if !strings.Contains(" "+opts+" ", " shallow ") {
200 | 		return nil, fmt.Errorf("fetch: server does not support shallow fetch")
201 | 	}
202 | 
203 | 	// Prepare and send request for pack file.
204 | 	var buf bytes.Buffer
205 | 	pw := newPktLineWriter(&buf)
206 | 	pw.WriteString("command=fetch")
207 | 	pw.Delim()
208 | 	pw.WriteString("deepen 1")
209 | 	pw.WriteString("want " + h.String())
210 | 	pw.WriteString("done")
211 | 	pw.Close()
212 | 	postbody := buf.Bytes()
213 | 
214 | 	req, _ := http.NewRequest("POST", r.url+"/git-upload-pack", &buf)
215 | 	req.Header.Set("Content-Type", "application/x-git-upload-pack-request")
216 | 	req.Header.Set("Accept", "application/x-git-upload-pack-result")
217 | 	req.Header.Set("Git-Protocol", "version=2")
218 | 
219 | 	resp, err := http.DefaultClient.Do(req)
220 | 	if err != nil {
221 | 		return nil, fmt.Errorf("fetch: %v", err)
222 | 	}
223 | 	defer resp.Body.Close()
224 | 	if resp.StatusCode != 200 {
225 | 		data, _ := ioutil.ReadAll(resp.Body)
226 | 		return nil, fmt.Errorf("fetch: %v\n%s\n%s", resp.Status, data, hex.Dump(postbody))
227 | 	}
228 | 	if ct := resp.Header.Get("Content-Type"); ct != "application/x-git-upload-pack-result" {
229 | 		return nil, fmt.Errorf("fetch: invalid response Content-Type: %v", ct)
230 | 	}
231 | 
232 | 	// Response is sequence of pkt-line packets.
233 | 	// It is plain text output (printed by git) until we find "packfile".
234 | 	// Then it switches to packets with a single prefix byte saying
235 | 	// what kind of data is in that packet:
236 | 	// 1 for pack file data, 2 for text output, 3 for errors.
237 | 	var data []byte
238 | 	pr := newPktLineReader(resp.Body)
239 | 	sawPackfile := false
240 | 	for {
241 | 		line, err := pr.Next()
242 | 		if err != nil {
243 | 			if err == io.EOF {
244 | 				break
245 | 			}
246 | 			return nil, fmt.Errorf("fetch: parsing response: %v", err)
247 | 		}
248 | 		if line == nil { // ignore delimiter
249 | 			continue
250 | 		}
251 | 		if !sawPackfile {
252 | 			// Discard response lines until we get to packfile start.
253 | 			if strings.TrimSuffix(string(line), "\n") == "packfile" {
254 | 				sawPackfile = true
255 | 			}
256 | 			continue
257 | 		}
258 | 		if len(line) == 0 || line[0] == 0 || line[0] > 3 {
259 | 			fmt.Printf("%q\n", line)
260 | 			continue
261 | 			return nil, fmt.Errorf("fetch: malformed response: invalid sideband: %q", line)
262 | 		}
263 | 		switch line[0] {
264 | 		case 1:
265 | 			data = append(data, line[1:]...)
266 | 		case 2:
267 | 			fmt.Printf("%s\n", line[1:])
268 | 		case 3:
269 | 			return nil, fmt.Errorf("fetch: server error: %s", line[1:])
270 | 		}
271 | 	}
272 | 
273 | 	if !bytes.HasPrefix(data, []byte("PACK")) {
274 | 		return nil, fmt.Errorf("fetch: malformed response: not packfile")
275 | 	}
276 | 
277 | 	// Unpack pack file and return fs.FS for the commit we downloaded.
278 | 	var s store
279 | 	if err := unpack(&s, data); err != nil {
280 | 		return nil, fmt.Errorf("fetch: %v", err)
281 | 	}
282 | 	tfs, err := s.commit(h)
283 | 	if err != nil {
284 | 		return nil, fmt.Errorf("fetch: %v", err)
285 | 	}
286 | 	return tfs, nil
287 | }
288 | 


--------------------------------------------------------------------------------
/fs.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2021 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package gitfs
  6 | 
  7 | import (
  8 | 	"bytes"
  9 | 	"crypto/sha1"
 10 | 	"encoding/hex"
 11 | 	"fmt"
 12 | 	hashpkg "hash"
 13 | 	"io"
 14 | 	"io/fs"
 15 | 	"runtime/debug"
 16 | 	"time"
 17 | )
 18 | 
 19 | // A Hash is a SHA-1 Hash identifying a particular Git object.
 20 | type Hash [20]byte
 21 | 
 22 | func (h Hash) String() string { return fmt.Sprintf("%x", h[:]) }
 23 | 
 24 | // parseHash parses the (full-length) Git hash text.
 25 | func parseHash(text string) (Hash, error) {
 26 | 	x, err := hex.DecodeString(text)
 27 | 	if err != nil || len(x) != 20 {
 28 | 		return Hash{}, fmt.Errorf("invalid hash")
 29 | 	}
 30 | 	var h Hash
 31 | 	copy(h[:], x)
 32 | 	return h, nil
 33 | }
 34 | 
 35 | // An objType is an object type indicator.
 36 | // The values are the ones used in Git pack encoding
 37 | // (https://git-scm.com/docs/pack-format#_object_types).
 38 | type objType int
 39 | 
 40 | const (
 41 | 	objNone   objType = 0
 42 | 	objCommit objType = 1
 43 | 	objTree   objType = 2
 44 | 	objBlob   objType = 3
 45 | 	objTag    objType = 4
 46 | 	// 5 undefined
 47 | 	objOfsDelta objType = 6
 48 | 	objRefDelta objType = 7
 49 | )
 50 | 
 51 | var objTypes = [...]string{
 52 | 	objCommit: "commit",
 53 | 	objTree:   "tree",
 54 | 	objBlob:   "blob",
 55 | 	objTag:    "tag",
 56 | }
 57 | 
 58 | func (t objType) String() string {
 59 | 	if t < 0 || int(t) >= len(objTypes) || objTypes[t] == "" {
 60 | 		return fmt.Sprintf("objType(%d)", int(t))
 61 | 	}
 62 | 	return objTypes[t]
 63 | }
 64 | 
 65 | // A dirEntry is a Git directory entry parsed from a tree object.
 66 | type dirEntry struct {
 67 | 	mode int
 68 | 	name []byte
 69 | 	hash Hash
 70 | }
 71 | 
 72 | // parseDirEntry parses the next directory entry from data,
 73 | // returning the entry and the number of bytes it occupied.
 74 | // If data is malformed, parseDirEntry returns dirEntry{}, 0.
 75 | func parseDirEntry(data []byte) (dirEntry, int) {
 76 | 	// Unclear where or if this format is documented by Git.
 77 | 	// Each directory entry is an octal mode, then a space,
 78 | 	// then a file name, then a NUL byte, then a 20-byte binary hash.
 79 | 	// Note that 'git cat-file -p <treehash>' shows a textual representation
 80 | 	// of this data, not the actual binary data. To see the binary data,
 81 | 	// use 'echo <treehash> | git cat-file --batch | hexdump -C'.
 82 | 	mode := 0
 83 | 	i := 0
 84 | 	for i < len(data) && data[i] != ' ' {
 85 | 		c := data[i]
 86 | 		if c < '0' || '7' < c {
 87 | 			return dirEntry{}, 0
 88 | 		}
 89 | 		mode = mode*8 + int(c) - '0'
 90 | 		i++
 91 | 	}
 92 | 	i++
 93 | 	j := i
 94 | 	for j < len(data) && data[j] != 0 {
 95 | 		j++
 96 | 	}
 97 | 	if len(data)-j < 1+20 {
 98 | 		return dirEntry{}, 0
 99 | 	}
100 | 	name := data[i:j]
101 | 	var h Hash
102 | 	copy(h[:], data[j+1:])
103 | 	return dirEntry{mode, name, h}, j + 1 + 20
104 | }
105 | 
106 | // treeLookup looks in the tree object data for the directory entry with the given name,
107 | // returning the mode and hash associated with the name.
108 | func treeLookup(data []byte, name string) (mode int, h Hash, ok bool) {
109 | 	// Note: The tree object directory entries are sorted by name,
110 | 	// but the directory entry data is not self-synchronizing,
111 | 	// so it's not possible to be clever and use a binary search here.
112 | 	for len(data) > 0 {
113 | 		e, size := parseDirEntry(data)
114 | 		if size == 0 {
115 | 			break
116 | 		}
117 | 		if string(e.name) == name {
118 | 			return e.mode, e.hash, true
119 | 		}
120 | 		data = data[size:]
121 | 	}
122 | 	return 0, Hash{}, false
123 | }
124 | 
125 | // commitKeyValue parses the commit object data
126 | // looking for the first header line "key: value" matching the given key.
127 | // It returns the associated value.
128 | // (Try 'git cat-file -p <commithash>' to see the commit data format.)
129 | func commitKeyValue(data []byte, key string) ([]byte, bool) {
130 | 	for i := 0; i < len(data); i++ {
131 | 		if i == 0 || data[i-1] == '\n' {
132 | 			if data[i] == '\n' {
133 | 				break
134 | 			}
135 | 			if len(data)-i >= len(key)+1 && data[len(key)] == ' ' && string(data[:len(key)]) == key {
136 | 				val := data[len(key)+1:]
137 | 				for j := 0; j < len(val); j++ {
138 | 					if val[j] == '\n' {
139 | 						val = val[:j]
140 | 						break
141 | 					}
142 | 				}
143 | 				return val, true
144 | 			}
145 | 		}
146 | 	}
147 | 	return nil, false
148 | }
149 | 
150 | // A store is a collection of Git objects, indexed for lookup by hash.
151 | type store struct {
152 | 	sha1  hashpkg.Hash    // reused hash state
153 | 	index map[Hash]stored // lookup index
154 | 	data  []byte          // concatenation of all object data
155 | }
156 | 
157 | // A stored describes a single stored object.
158 | type stored struct {
159 | 	typ objType // object type
160 | 	off int     // object data is store.data[off:off+len]
161 | 	len int
162 | }
163 | 
164 | // add adds an object with the given type and content to s, returning its Hash.
165 | // If the object is already stored in s, add succeeds but doesn't store a second copy.
166 | func (s *store) add(typ objType, data []byte) (Hash, []byte) {
167 | 	if s.sha1 == nil {
168 | 		s.sha1 = sha1.New()
169 | 	}
170 | 
171 | 	// Compute Git hash for object.
172 | 	s.sha1.Reset()
173 | 	fmt.Fprintf(s.sha1, "%s %d\x00", typ, len(data))
174 | 	s.sha1.Write(data)
175 | 	var h Hash
176 | 	s.sha1.Sum(h[:0]) // appends into h
177 | 
178 | 	e, ok := s.index[h]
179 | 	if !ok {
180 | 		if s.index == nil {
181 | 			s.index = make(map[Hash]stored)
182 | 		}
183 | 		e = stored{typ, len(s.data), len(data)}
184 | 		s.index[h] = e
185 | 		s.data = append(s.data, data...)
186 | 	}
187 | 	return h, s.data[e.off : e.off+e.len]
188 | }
189 | 
190 | // object returns the type and data for the object with hash h.
191 | // If there is no object with hash h, object returns 0, nil.
192 | func (s *store) object(h Hash) (typ objType, data []byte) {
193 | 	d, ok := s.index[h]
194 | 	if !ok {
195 | 		return 0, nil
196 | 	}
197 | 	return d.typ, s.data[d.off : d.off+d.len]
198 | }
199 | 
200 | // commit returns a treeFS for the file system tree associated with the given commit hash.
201 | func (s *store) commit(h Hash) (*treeFS, error) {
202 | 	// The commit object data starts with key-value pairs
203 | 	typ, data := s.object(h)
204 | 	if typ == objNone {
205 | 		return nil, fmt.Errorf("commit %s: no such hash", h)
206 | 	}
207 | 	if typ != objCommit {
208 | 		return nil, fmt.Errorf("commit %s: unexpected type %s", h, typ)
209 | 	}
210 | 	treeHash, ok := commitKeyValue(data, "tree")
211 | 	if !ok {
212 | 		return nil, fmt.Errorf("commit %s: no tree", h)
213 | 	}
214 | 	h, err := parseHash(string(treeHash))
215 | 	if err != nil {
216 | 		return nil, fmt.Errorf("commit %s: invalid tree %q", h, treeHash)
217 | 	}
218 | 	return &treeFS{s, h}, nil
219 | }
220 | 
221 | // A treeFS is an fs.FS serving a Git file system tree rooted at a given tree object hash.
222 | type treeFS struct {
223 | 	s    *store
224 | 	tree Hash // root tree
225 | }
226 | 
227 | // Open opens the given file or directory, implementing the fs.FS Open method.
228 | func (t *treeFS) Open(name string) (f fs.File, err error) {
229 | 	defer func() {
230 | 		if e := recover(); e != nil {
231 | 			f = nil
232 | 			err = fmt.Errorf("gitfs panic: %v\n%s", e, debug.Stack())
233 | 		}
234 | 	}()
235 | 
236 | 	// Process each element in the slash-separated path, producing hash identified by name.
237 | 	h := t.tree
238 | 	start := 0 // index of start of final path element in name
239 | 	if name != "." {
240 | 		for i := 0; i <= len(name); i++ {
241 | 			if i == len(name) || name[i] == '/' {
242 | 				// Look up name in current tree object h.
243 | 				typ, data := t.s.object(h)
244 | 				if typ != objTree {
245 | 					return nil, &fs.PathError{Path: name, Op: "open", Err: fs.ErrNotExist}
246 | 				}
247 | 				_, th, ok := treeLookup(data, name[start:i])
248 | 				if !ok {
249 | 					return nil, &fs.PathError{Path: name, Op: "open", Err: fs.ErrNotExist}
250 | 				}
251 | 				h = th
252 | 				if i < len(name) {
253 | 					start = i + 1
254 | 				}
255 | 			}
256 | 		}
257 | 	}
258 | 
259 | 	// The hash h is the hash for name. Load its object.
260 | 	typ, data := t.s.object(h)
261 | 	info := fileInfo{name, name[start:], 0, 0}
262 | 	if typ == objBlob {
263 | 		// Regular file.
264 | 		info.mode = 0444
265 | 		info.size = int64(len(data))
266 | 		return &blobFile{info, bytes.NewReader(data)}, nil
267 | 	}
268 | 	if typ == objTree {
269 | 		// Directory.
270 | 		info.mode = fs.ModeDir | 0555
271 | 		return &dirFile{t.s, info, data, 0}, nil
272 | 	}
273 | 	return nil, &fs.PathError{Path: name, Op: "open", Err: fmt.Errorf("unexpected git object type %s", typ)}
274 | }
275 | 
276 | // fileInfo implements fs.FileInfo.
277 | type fileInfo struct {
278 | 	path string
279 | 	name string
280 | 	mode fs.FileMode
281 | 	size int64
282 | }
283 | 
284 | func (i *fileInfo) Name() string               { return i.name }
285 | func (i *fileInfo) Type() fs.FileMode          { return i.mode & fs.ModeType }
286 | func (i *fileInfo) Mode() fs.FileMode          { return i.mode }
287 | func (i *fileInfo) Sys() interface{}           { return nil }
288 | func (i *fileInfo) IsDir() bool                { return i.mode&fs.ModeDir != 0 }
289 | func (i *fileInfo) Size() int64                { return i.size }
290 | func (i *fileInfo) Info() (fs.FileInfo, error) { return i, nil }
291 | func (i *fileInfo) ModTime() time.Time         { return time.Time{} }
292 | 
293 | func (i *fileInfo) err(op string, err error) error {
294 | 	return &fs.PathError{Path: i.path, Op: op, Err: err}
295 | }
296 | 
297 | // A blobFile implements fs.File for a regular file.
298 | // The embedded bytes.Reader provides Read, Seek and other I/O methods.
299 | type blobFile struct {
300 | 	info fileInfo
301 | 	*bytes.Reader
302 | }
303 | 
304 | func (f *blobFile) Close() error               { return nil }
305 | func (f *blobFile) Stat() (fs.FileInfo, error) { return &f.info, nil }
306 | 
307 | // A dirFile implements fs.File for a directory.
308 | type dirFile struct {
309 | 	s    *store
310 | 	info fileInfo
311 | 	data []byte
312 | 	off  int
313 | }
314 | 
315 | func (f *dirFile) Close() error               { return nil }
316 | func (f *dirFile) Read([]byte) (int, error)   { return 0, f.info.err("read", fs.ErrInvalid) }
317 | func (f *dirFile) Stat() (fs.FileInfo, error) { return &f.info, nil }
318 | 
319 | func (f *dirFile) Seek(offset int64, whence int) (int64, error) {
320 | 	if offset == 0 && whence == 0 {
321 | 		// Allow rewind to start of directory.
322 | 		f.off = 0
323 | 		return 0, nil
324 | 	}
325 | 	return 0, f.info.err("seek", fs.ErrInvalid)
326 | }
327 | 
328 | func (f *dirFile) ReadDir(n int) (list []fs.DirEntry, err error) {
329 | 	defer func() {
330 | 		if e := recover(); e != nil {
331 | 			list = nil
332 | 			err = fmt.Errorf("gitfs panic: %v\n%s", e, debug.Stack())
333 | 		}
334 | 	}()
335 | 
336 | 	for (n <= 0 || len(list) < n) && f.off < len(f.data) {
337 | 		e, size := parseDirEntry(f.data[f.off:])
338 | 		if size == 0 {
339 | 			break
340 | 		}
341 | 		f.off += size
342 | 		typ, data := f.s.object(e.hash)
343 | 		mode := fs.FileMode(0444)
344 | 		if typ == objTree {
345 | 			mode = fs.ModeDir | 0555
346 | 		}
347 | 		infoSize := int64(0)
348 | 		if typ == objBlob {
349 | 			infoSize = int64(len(data))
350 | 		}
351 | 		name := string(e.name)
352 | 		list = append(list, &fileInfo{name, name, mode, infoSize})
353 | 	}
354 | 	if len(list) == 0 && n > 0 {
355 | 		return list, io.EOF
356 | 	}
357 | 	return list, nil
358 | }
359 | 


--------------------------------------------------------------------------------