├── .gitignore ├── LICENSE ├── README.md ├── main.go ├── make-dist.bash └── test.bash /.gitignore: -------------------------------------------------------------------------------- 1 | /git-annex-remote-b2* 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Jack Christopher Kastorff 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | Binaries are available in the [releases page](https://github.com/encryptio/git-annex-remote-b2/releases). You want to add the binary to your `$PATH`, either by creating a new directory for it (ideally inside your home directory, such as `~/bin`), or by putting the binary in a directory that is already in your `$PATH` (such as `/usr/local/bin`.) 5 | 6 | To build from source, [set up a GOPATH](https://golang.org/doc/code.html) and then run `go get github.com/encryptio/git-annex-remote-b2`. 7 | 8 | Usage 9 | ===== 10 | 11 | After putting `git-annex-remote-b2` in your `$PATH`, use it like any other external remote: 12 | 13 | ``` 14 | ~/repo $ git annex initremote b2 type=external externaltype=b2 bucket=mydata 15 | ``` 16 | 17 | B2 credentials may either be given as arguments to `initremote` ( `accountid=XXXX appkey=XXXXXXXXXXXXXXXX`) or as the environment variables `$B2_APP_KEY` and `$B2_ACCOUNT_ID`. If you pass them as arguments to `initremote`, the credentials will be stored in the git-annex repository and thus will be available to all clones of it. 18 | 19 | Optionally, you may pass `prefix=something` to have `git-annex-remote-b2` prepend `something/` to the keys it stores in B2. 20 | 21 | Improving the financial cost of this remote 22 | ------------------------------------------- 23 | 24 | By default, all remotes are `semitrusted` in git-annex. This means that the remote should be checked to see if it actually has data when doing an operation that assumes that the data is safe if they have it and unsafe if not (for example, a *local* `git annex drop`.) If you tell git-annex that this remote won't lose data randomly by setting this remote's trust level higher, then those `checkpresentkey` calls (which turn into `ListFileNames` calls on B2) should go away. 25 | 26 | This is particularly important if you're under the free trial limits of B2. 27 | 28 | ``` 29 | ~/repo $ git annex trust b2 30 | ``` 31 | 32 | Secondly, `git-annex` will assume all non-local remotes have the same cost, and won't prefer one over the other by default. If you have a remote that doesn't cost as much as talking to B2, you should set the B2 remote's cost very high so that `git-annex` will prefer talking to the cheap remote rather than B2 when possible. (The default values are 100 for local remotes, and 200 for non-local remotes.) 33 | 34 | ``` 35 | ~/repo $ git config remote.b2.annex-cost 1000 36 | ``` 37 | 38 | Note that setting the `annex-cost` like this is a repo-local operation only; it does not apply to other clones of the repo you might have. 39 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "crypto/sha1" 6 | "encoding/hex" 7 | "errors" 8 | "fmt" 9 | "io" 10 | "os" 11 | "strings" 12 | "time" 13 | 14 | "github.com/encryptio/go-git-annex-external/external" 15 | "gopkg.in/kothar/go-backblaze.v0" 16 | ) 17 | 18 | type B2Ext struct { 19 | bucket *backblaze.Bucket 20 | prefix string 21 | 22 | lastList struct { 23 | setAt time.Time 24 | file string 25 | found bool 26 | id string 27 | } 28 | } 29 | 30 | func authenticate(e *external.External) (*backblaze.B2, error) { 31 | accountID, err := e.GetConfig("accountid") 32 | if err != nil { 33 | return nil, err 34 | } 35 | if accountID == "" { 36 | accountID = os.Getenv("B2_ACCOUNT_ID") 37 | } 38 | if accountID == "" { 39 | return nil, errors.New("You must set accountid to the backblaze account id") 40 | } 41 | 42 | appKey, err := e.GetConfig("appkey") 43 | if err != nil { 44 | return nil, err 45 | } 46 | if appKey == "" { 47 | appKey = os.Getenv("B2_APP_KEY") 48 | } 49 | if appKey == "" { 50 | return nil, errors.New("You must set appkey to the backblaze application key") 51 | } 52 | 53 | b2, err := backblaze.NewB2(backblaze.Credentials{ 54 | AccountID: accountID, 55 | ApplicationKey: appKey, 56 | }) 57 | if err != nil { 58 | return nil, fmt.Errorf("Couldn't authorize: %v", err) 59 | } 60 | 61 | return b2, nil 62 | } 63 | 64 | func getBucketConfig(e *external.External) (bucket string, prefix string, err error) { 65 | bucket, err = e.GetConfig("bucket") 66 | if err != nil { 67 | return "", "", err 68 | } 69 | if bucket == "" { 70 | return "", "", errors.New("You must set bucket to the bucket name") 71 | } 72 | 73 | prefix, err = e.GetConfig("prefix") 74 | // prefix == "" is ok. 75 | if prefix != "" && !strings.HasSuffix(prefix, "/") { 76 | prefix = prefix + "/" 77 | } 78 | 79 | return bucket, prefix, nil 80 | } 81 | 82 | func (be *B2Ext) listFileCached(file string) (found bool, fileID string, err error) { 83 | // Caching the last result of ListFileNames is no less safe than not caching 84 | // it; the race condition of two concurrent git annex copy --to b2 processes 85 | // sending the same file can result in a file with two identical versions in 86 | // both cases. 87 | // 88 | // However, caching this reduces the number of ListFileNames to half of what 89 | // it is during uploads (since git-annex always calls checkpresent which 90 | // uses ListFileNames before uploading, but when uploading we also do 91 | // upload elision by calling ListFileNames.) 92 | 93 | if be.lastList.file != file || time.Since(be.lastList.setAt) > time.Second*15 { 94 | res, err := be.bucket.ListFileNames(file, 1) 95 | if err != nil { 96 | return false, "", err 97 | } 98 | 99 | be.lastList.setAt = time.Now() 100 | if len(res.Files) == 0 || res.Files[0].Name != file { 101 | be.lastList.file = file 102 | be.lastList.found = false 103 | be.lastList.id = "" 104 | } else { 105 | be.lastList.file = file 106 | be.lastList.found = true 107 | be.lastList.id = res.Files[0].ID 108 | } 109 | } 110 | 111 | return be.lastList.found, be.lastList.id, nil 112 | } 113 | 114 | func (be *B2Ext) clearListFileCache() { 115 | be.lastList.setAt = time.Time{} 116 | be.lastList.file = "" 117 | be.lastList.found = false 118 | be.lastList.id = "" 119 | } 120 | 121 | func (be *B2Ext) setup(e *external.External, canCreateBucket bool) error { 122 | if be.bucket != nil { 123 | // already done! 124 | return nil 125 | } 126 | 127 | b2, err := authenticate(e) 128 | if err != nil { 129 | return err 130 | } 131 | 132 | bucketName, prefix, err := getBucketConfig(e) 133 | if err != nil { 134 | return err 135 | } 136 | 137 | bucket, err := b2.Bucket(bucketName) 138 | if err != nil { 139 | return fmt.Errorf("couldn't open bucket %#v: %v", bucketName, err) 140 | } 141 | 142 | if bucket == nil { 143 | if !canCreateBucket { 144 | return fmt.Errorf("bucket %#v does not exist anymore", bucketName) 145 | } 146 | 147 | fmt.Fprintf(os.Stderr, "Creating private B2 bucket %#v\n", bucketName) 148 | 149 | bucket, err = b2.CreateBucket(bucketName, backblaze.AllPrivate) 150 | if err != nil { 151 | return fmt.Errorf("couldn't create bucket %#v: %v", bucketName, err) 152 | } 153 | } 154 | 155 | be.bucket = bucket 156 | be.prefix = prefix 157 | 158 | return nil 159 | } 160 | 161 | func (be *B2Ext) InitRemote(e *external.External) error { 162 | return be.setup(e, true) 163 | } 164 | 165 | func (be *B2Ext) Prepare(e *external.External) error { 166 | return be.setup(e, false) 167 | } 168 | 169 | func (be *B2Ext) Store(e *external.External, key, file string) error { 170 | fh, err := os.Open(file) 171 | if err != nil { 172 | return err 173 | } 174 | defer fh.Close() 175 | 176 | shaReady := make(chan struct{}) 177 | var haveSHA []byte 178 | var contentLength int64 179 | var shaError error 180 | go func() { 181 | defer close(shaReady) 182 | 183 | sha := sha1.New() 184 | contentLength, shaError = io.Copy(sha, fh) 185 | if shaError != nil { 186 | return 187 | } 188 | 189 | haveSHA = sha.Sum(nil) 190 | 191 | _, shaError = fh.Seek(0, 0) 192 | }() 193 | 194 | found, fileID, err := be.listFileCached(be.prefix + key) 195 | if err != nil { 196 | return fmt.Errorf("couldn't list filenames: %v", err) 197 | } 198 | 199 | if found { 200 | // file probably already stored; make sure using the SHA1 201 | b2file, err := be.bucket.GetFileInfo(fileID) 202 | if err != nil { 203 | return fmt.Errorf("couldn't get file info for %#v: %v", fileID, err) 204 | } 205 | if b2file != nil { 206 | <-shaReady 207 | 208 | wantSHA, err := hex.DecodeString(b2file.ContentSha1) 209 | if err == nil && bytes.Equal(haveSHA, wantSHA) { 210 | // File already exists with correct data. 211 | return nil 212 | } 213 | 214 | // File exists but is the incorrect data. Delete the old version 215 | // first; B2 will keep the old version around otherwise. 216 | _, err = be.bucket.DeleteFileVersion(be.prefix+key, b2file.ID) 217 | if err != nil { 218 | return fmt.Errorf("couldn't delete old file version: %v", err) 219 | } 220 | } 221 | } 222 | 223 | <-shaReady 224 | if shaError != nil { 225 | return fmt.Errorf("couldn't hash local file %v: %v", file, shaError) 226 | } 227 | 228 | _, err = be.bucket.UploadHashedFile( 229 | be.prefix+key, 230 | nil, 231 | external.NewProgressReader(fh, e), 232 | hex.EncodeToString(haveSHA), 233 | contentLength) 234 | 235 | be.clearListFileCache() 236 | 237 | if err != nil { 238 | return fmt.Errorf("couldn't upload file: %v", err) 239 | } 240 | 241 | return nil 242 | } 243 | 244 | func (be *B2Ext) Retrieve(e *external.External, key, file string) error { 245 | fh, err := os.Create(file) 246 | if err != nil { 247 | return fmt.Errorf("couldn't open %v for writing: %v", file, err) 248 | } 249 | defer fh.Close() 250 | 251 | _, rc, err := be.bucket.DownloadFileByName(be.prefix + key) 252 | if rc != nil { 253 | defer rc.Close() 254 | } 255 | if err != nil { 256 | return err 257 | } 258 | 259 | _, err = io.Copy(fh, external.NewProgressReader(rc, e)) 260 | if err != nil { 261 | return err 262 | } 263 | 264 | return nil 265 | } 266 | 267 | func (be *B2Ext) CheckPresent(e *external.External, key string) (bool, error) { 268 | found, _, err := be.listFileCached(be.prefix + key) 269 | if err != nil { 270 | return false, fmt.Errorf("couldn't list filenames: %v", err) 271 | } 272 | 273 | return found, nil 274 | } 275 | 276 | func (be *B2Ext) Remove(e *external.External, key string) error { 277 | found, fileID, err := be.listFileCached(be.prefix + key) 278 | if err != nil { 279 | return fmt.Errorf("couldn't list filenames: %v", err) 280 | } 281 | 282 | if !found { 283 | // File already non-existent, nothing to remove 284 | return nil 285 | } 286 | 287 | _, err = be.bucket.DeleteFileVersion(be.prefix+key, fileID) 288 | be.clearListFileCache() 289 | if err != nil { 290 | return fmt.Errorf("couldn't delete file version: %v", err) 291 | } 292 | 293 | return nil 294 | } 295 | 296 | func (be *B2Ext) GetCost(e *external.External) (int, error) { 297 | return 0, external.ErrUnsupportedRequest 298 | } 299 | 300 | func (be *B2Ext) GetAvailability(e *external.External) (external.Availability, error) { 301 | return external.AvailabilityGlobal, nil 302 | } 303 | 304 | func (be *B2Ext) WhereIs(e *external.External, key string) (string, error) { 305 | return "", external.ErrUnsupportedRequest 306 | } 307 | 308 | func main() { 309 | h := &B2Ext{} 310 | 311 | var ( 312 | in io.Reader = os.Stdin 313 | out io.Writer = os.Stdout 314 | ) 315 | 316 | if os.Getenv("GIT_ANNEX_EXTERNAL_B2_PROTOCOL_DEBUG") != "" { 317 | fmt.Fprintf(os.Stderr, "git-annex-remote-b2: enabling protocol debug logging\n") 318 | in = io.TeeReader(in, os.Stderr) 319 | out = io.MultiWriter(out, os.Stderr) 320 | } 321 | 322 | err := external.RunLoop(in, out, h) 323 | if err != nil { 324 | fmt.Fprintf(os.Stderr, "Error: %v\n", err) 325 | os.Exit(1) 326 | } 327 | 328 | os.Exit(0) 329 | } 330 | -------------------------------------------------------------------------------- /make-dist.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | for GOOS in darwin linux; do 5 | for GOARCH in 386 amd64; do 6 | export GOOS 7 | export GOARCH 8 | 9 | DIR="git-annex-remote-b2.$GOOS-$GOARCH" 10 | echo "Creating ${DIR}.tar.gz" 11 | rm -rf "$DIR" 12 | mkdir "$DIR" 13 | 14 | go build -o "$DIR/git-annex-remote-b2" 15 | cp README.md LICENSE "$DIR/" 16 | 17 | rm -f "$DIR".tar.gz 18 | tar -czf "$DIR".tar.gz "$DIR" 19 | rm -rf "$DIR" 20 | done 21 | done 22 | -------------------------------------------------------------------------------- /test.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | if [ -z "$B2_APP_KEY" ]; then 5 | echo "Set \$B2_APP_KEY" 6 | exit 1 7 | fi 8 | 9 | if [ -z "$B2_ACCOUNT_ID" ]; then 10 | echo "Set \$B2_ACCOUNT_ID" 11 | exit 1 12 | fi 13 | 14 | DIR="$(pwd)/integration-test" 15 | 16 | if [ -e "$DIR" ]; then 17 | chmod -R a+w "$DIR" 18 | rm -rf "$DIR" 19 | fi 20 | 21 | mkdir "$DIR" 22 | mkdir "$DIR/bin" 23 | 24 | go build -o "$DIR/bin/git-annex-remote-b2" 25 | export PATH="$DIR/bin:$PATH" 26 | 27 | pushd "$DIR" 28 | git init 29 | git annex init 30 | 31 | BUCKET_NAME="git-annex-test-$(cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 32 | head -n 1)" 32 | 33 | git annex initremote noencrypt type=external externaltype=b2 encryption=none bucket="$BUCKET_NAME" prefix=raw 34 | git annex initremote --fast encrypt type=external externaltype=b2 encryption=shared bucket="$BUCKET_NAME" prefix=enc 35 | 36 | cp bin/git-annex-remote-b2 somefile 37 | git annex add somefile 38 | git commit -m 'commit' 39 | 40 | git annex copy --to noencrypt 41 | git annex fsck --from noencrypt 42 | git annex drop 43 | git annex move --from noencrypt 44 | git annex fsck --from noencrypt 45 | 46 | git annex copy --to encrypt 47 | git annex fsck --from encrypt 48 | git annex drop 49 | git annex move --from encrypt 50 | git annex fsck --from encrypt 51 | 52 | git annex testremote --fast encrypt 53 | git annex testremote --fast noencrypt 54 | 55 | popd 56 | chmod -R a+w "$DIR" 57 | rm -rf "$DIR" 58 | 59 | echo "Passed!" 60 | exit 0 61 | 62 | --------------------------------------------------------------------------------