├── MAINTAINERS ├── _testdata ├── regression.yml ├── server.pem └── server.key ├── cmd └── gitcollector │ ├── main.go │ └── subcmd │ └── download.go ├── generate-certs.sh ├── Makefile ├── .gitignore ├── job.go ├── Dockerfile ├── .travis.yml ├── library ├── utils.go ├── job_test.go └── job.go ├── .github └── workflows │ └── push_pull.yml ├── downloader ├── testhelper │ ├── testhelper.go │ └── brokenfs.go ├── download.go ├── git.go └── download_test.go ├── provider ├── github_test.go ├── github.go ├── updates_test.go └── updates.go ├── worker.go ├── worker_pool_test.go ├── go.mod ├── Jenkinsfile ├── metrics ├── metrics_db.go ├── metrics_test.go └── metrics.go ├── scheduler.go ├── updater ├── update_test.go └── update.go ├── integration ├── postgres_test.go └── helper.go ├── worker_pool.go ├── discovery ├── github-iterator.go ├── github.go └── github_test.go ├── README.md ├── testutils └── proxy.go ├── go.sum └── LICENSE /MAINTAINERS: -------------------------------------------------------------------------------- 1 | Manuel Carmona (@mcarmonaa) 2 | -------------------------------------------------------------------------------- /_testdata/regression.yml: -------------------------------------------------------------------------------- 1 | # TODO(@lwsanty): change to repos from infra #1130 2 | # regression config represents the array of lists of github organizations 3 | - bblfsh,git-fixtures -------------------------------------------------------------------------------- /cmd/gitcollector/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/src-d/gitcollector/cmd/gitcollector/subcmd" 5 | "gopkg.in/src-d/go-cli.v0" 6 | ) 7 | 8 | var ( 9 | version string 10 | build string 11 | ) 12 | 13 | var app = cli.New("gitcollector", version, build, "source{d} tool to download repositories into siva files") 14 | 15 | func main() { 16 | app.AddCommand(&subcmd.DownloadCmd{}) 17 | app.RunMain() 18 | } 19 | -------------------------------------------------------------------------------- /generate-certs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | case `uname -s` in 3 | Linux*) sslConfig=/etc/ssl/openssl.cnf;; 4 | Darwin*) sslConfig=/System/Library/OpenSSL/openssl.cnf;; 5 | esac 6 | openssl req \ 7 | -newkey rsa:2048 \ 8 | -x509 \ 9 | -nodes \ 10 | -keyout _testdata/server.key \ 11 | -new \ 12 | -out _testdata/server.crt \ 13 | -subj /CN=localhost \ 14 | -reqexts SAN \ 15 | -extensions SAN \ 16 | -config <(cat $sslConfig \ 17 | <(printf '[SAN]\nsubjectAltName=DNS:localhost')) \ 18 | -sha256 \ 19 | -days 3650 20 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Package configuration 2 | PROJECT = gitcollector 3 | COMMANDS = cmd/gitcollector 4 | 5 | # If you need to build more than one dockerfile, you can do so like this: 6 | # DOCKERFILES = Dockerfile_filename1:repositoryname1 Dockerfile_filename2:repositoryname2 ... 7 | 8 | # Including ci Makefile 9 | CI_REPOSITORY ?= https://github.com/src-d/ci.git 10 | CI_BRANCH ?= v1 11 | CI_PATH ?= .ci 12 | MAKEFILE := $(CI_PATH)/Makefile.main 13 | TEST_RACE ?= true 14 | $(MAKEFILE): 15 | git clone --quiet --depth 1 -b $(CI_BRANCH) $(CI_REPOSITORY) $(CI_PATH); 16 | -include $(MAKEFILE) 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | *.exe 23 | *.test 24 | *.prof 25 | 26 | # Output of the go coverage tool, specifically when used with LiteIDE 27 | *.out 28 | 29 | # CI artefacts 30 | .ci/ 31 | coverage.txt 32 | 33 | # Built binaries 34 | bin/ 35 | build/ 36 | 37 | # Dependencies 38 | vendor/ 39 | 40 | -------------------------------------------------------------------------------- /job.go: -------------------------------------------------------------------------------- 1 | package gitcollector 2 | 3 | import ( 4 | "context" 5 | ) 6 | 7 | // Job represents a gitcollector task. 8 | type Job interface { 9 | // Process perform the necessary work on the job. 10 | Process(context.Context) error 11 | } 12 | 13 | // MetricsCollector represents a component in charge to collect jobs metrics. 14 | type MetricsCollector interface { 15 | // Start starts collecting metrics. 16 | Start() 17 | // Stop stops collectingMetrincs. 18 | Stop(immediate bool) 19 | // Success registers metrics about successfully processed Job. 20 | Success(Job) 21 | // Faile register metrics about a failed processed Job. 22 | Fail(Job) 23 | // Discover register metrics about a discovered Job. 24 | Discover(Job) 25 | } 26 | 27 | // Provider interface represents a service to generate new Jobs. 28 | type Provider interface { 29 | Start() error 30 | Stop() error 31 | } 32 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | #============================ 2 | # Stage 1: build gitcollector 3 | #============================ 4 | FROM golang:1.12.6-alpine3.9 AS builder 5 | 6 | ENV GO111MODULE=on 7 | 8 | COPY . /gitcollector 9 | 10 | WORKDIR /gitcollector/cmd/gitcollector 11 | 12 | RUN apk add --no-cache dumb-init=1.2.2-r1 git && go build -o /bin/gitcollector 13 | 14 | #=================================================== 15 | # Stage 2: copy binary and set environment variables 16 | #=================================================== 17 | FROM alpine:3.9.4 18 | 19 | COPY --from=builder /bin/gitcollector /usr/bin/dumb-init /bin/ 20 | 21 | RUN apk add --no-cache ca-certificates 22 | 23 | # volume where the repositories will be downloaded 24 | VOLUME ["/library"] 25 | 26 | ENV GITCOLLECTOR_LIBRARY=/library 27 | 28 | WORKDIR /library 29 | ENTRYPOINT ["/bin/dumb-init", "--"] 30 | CMD ["gitcollector", "download"] 31 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | go: 1.13.x 4 | go_import_path: github.com/src-d/gitcollector 5 | 6 | matrix: 7 | fast_finish: true 8 | 9 | services: 10 | - docker 11 | 12 | script: 13 | - make test 14 | 15 | jobs: 16 | include: 17 | - os: linux 18 | sudo: required 19 | dist: bionic 20 | 21 | before_deploy: 22 | - make packages 23 | 24 | deploy: 25 | provider: releases 26 | api_key: $GITHUB_TOKEN 27 | file_glob: true 28 | file: build/*linux_amd64.tar.gz 29 | skip_cleanup: true 30 | on: 31 | tags: true 32 | 33 | after_deploy: 34 | - make docker-push-latest-release 35 | 36 | - os: osx 37 | osx_image: xcode10.2 38 | 39 | before_deploy: 40 | - make packages 41 | 42 | deploy: 43 | provider: releases 44 | api_key: $GITHUB_TOKEN 45 | file_glob: true 46 | file: build/*darwin_amd64.tar.gz 47 | skip_cleanup: true 48 | on: 49 | tags: true 50 | -------------------------------------------------------------------------------- /library/utils.go: -------------------------------------------------------------------------------- 1 | package library 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/src-d/go-borges" 7 | "gopkg.in/src-d/go-errors.v1" 8 | ) 9 | 10 | var ( 11 | // ErrNotSivaLibrary is returned when a borges.Library is not a 12 | // siva.Library 13 | ErrNotSivaLibrary = errors.NewKind("not siva library found") 14 | 15 | // ErrNotSivaLocation is returned when a borges.Library is no a 16 | // siva.Location 17 | ErrNotSivaLocation = errors.NewKind("not siva location found") 18 | ) 19 | 20 | // NewRepositoryID builds a borges.RepositoryID from the given endpoint. 21 | func NewRepositoryID(endpoint string) (borges.RepositoryID, error) { 22 | id, err := borges.NewRepositoryID(endpoint) 23 | if err != nil { 24 | return "", err 25 | } 26 | 27 | return borges.RepositoryID(strings.TrimSuffix(id.String(), ".git")), nil 28 | } 29 | 30 | // GetOrgFromEndpoint retrieve the organization from an endpoint. 31 | func GetOrgFromEndpoint(endpoint string) string { 32 | id, _ := NewRepositoryID(endpoint) 33 | org := strings.Split(id.String(), "/")[1] 34 | return strings.ToLower(org) 35 | } 36 | -------------------------------------------------------------------------------- /_testdata/server.pem: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIICzjCCAbagAwIBAgIUShAAlHP7BagLuY+xCv3a5rnhvjQwDQYJKoZIhvcNAQEL 3 | BQAwFDESMBAGA1UEAwwJbG9jYWxob3N0MB4XDTE5MDkyNDEwMzMwMFoXDTI5MDky 4 | MTEwMzMwMFowFDESMBAGA1UEAwwJbG9jYWxob3N0MIIBIjANBgkqhkiG9w0BAQEF 5 | AAOCAQ8AMIIBCgKCAQEAyOFiQuC2eVR7MRBFRuiG7MqpR8JnB+V/csEQHNIi1v+a 6 | 85BeRRuYOnGjg5Uqet3uNLegXOOV1HcN5/qPDb/8HRJIqM4UTnGuJjxqVe9bg0CW 7 | ja1e7Kfsn5UGupGK9lom4ZPAWgSRHjX3RGwABeIWceijS0SDKXxnopjUalXwPkDX 8 | 4cD+w/bxvK6sSVvWo+mCsYQaJ7xrpmOpGaeLKsRB3W/8k0x0d9vT7AW0+zyfWYnf 9 | U1MtQ7bYsmdlImDxWmBhOWGIjDqpvLN2izK/FqhgrcKnYNWI3lfR7fR6Qy4EwYIr 10 | 8F0OW96KG1KW6xpJsedh84wdR5G8VKStjSheLsf06QIDAQABoxgwFjAUBgNVHREE 11 | DTALgglsb2NhbGhvc3QwDQYJKoZIhvcNAQELBQADggEBALaAuARTKmRCeBgCae98 12 | 2jgepX1dxtEL0oHWBUNW28pZ0Oko+yFBVZzIDlmKTNT0qVgNgCfwZx3werNG7T9h 13 | x3annlQ0MGMGWqGi2kHif5VP/Tw7vHDo8BtIeN16BDhZ/5HtSOTYa9ZrD3lHVI1O 14 | ATE/gw7gC0OhPEvDYs+ob9iQ6exaHIOJwjo0XrRiUOyOr7dt31y2Zj53jxOHdNIJ 15 | oOqU7JzixJcNDb9wSk4rUURLQLyZrjQiFrhYiLPC0PLEkRSJxgnqMIf6AhGaub3S 16 | boINjFa12llUn04Gw3TB7xNGmoevdhZ0WwpFajepepTc+VqXs24PehwGvWWt9EJ+ 17 | zg8= 18 | -----END CERTIFICATE----- 19 | -------------------------------------------------------------------------------- /.github/workflows/push_pull.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | on: [push, pull_request] 3 | jobs: 4 | 5 | build: 6 | name: Test on go ${{ matrix.go_version }} and ${{ matrix.os }} 7 | runs-on: ${{ matrix.os }} 8 | strategy: 9 | matrix: 10 | go_version: [1.13] 11 | os: [ubuntu-latest, macOS-latest] 12 | 13 | steps: 14 | - name: Set up Go ${{ matrix.go_version }} 15 | uses: actions/setup-go@v1 16 | with: 17 | go-version: ${{ matrix.go_version }} 18 | id: go 19 | 20 | - name: Check out code into the Go module directory 21 | uses: actions/checkout@v1 22 | 23 | - name: Test 24 | run: | 25 | make test-coverage 26 | env: 27 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 28 | 29 | - name: Upload coverage 30 | run: | 31 | os=os_$(echo ${{ matrix.os }} | tr '[:punct:]' '_') 32 | go=go_$(echo ${{ matrix.go_version }} | tr '[:punct:]' '_') 33 | bash <(curl -s https://codecov.io/bash) -f coverage.txt -cF $os,$go 34 | env: 35 | CODECOV_TOKEN: be731f61-6ca9-42b0-8f1a-59f4a0b28c0d 36 | 37 | - name: Test races 38 | run: | 39 | make test-race 40 | env: 41 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 42 | 43 | 44 | -------------------------------------------------------------------------------- /downloader/testhelper/testhelper.go: -------------------------------------------------------------------------------- 1 | package testhelper 2 | 3 | import ( 4 | "io/ioutil" 5 | "os" 6 | "path/filepath" 7 | 8 | "github.com/src-d/go-borges/siva" 9 | "gopkg.in/src-d/go-billy.v4" 10 | "gopkg.in/src-d/go-billy.v4/osfs" 11 | ) 12 | 13 | // Helper is a struct that's used to simplify preparation and execution of download tests 14 | type Helper struct { 15 | Dir string 16 | Siva string 17 | FS billy.Filesystem 18 | TempFS billy.Filesystem 19 | Lib *siva.Library 20 | } 21 | 22 | // NewHelper is Helper's constructor 23 | func NewHelper() (*Helper, func(), error) { 24 | dir, err := ioutil.TempDir("", "gitcollector") 25 | if err != nil { 26 | return nil, func() {}, err 27 | } 28 | closer := func() { os.RemoveAll(dir) } 29 | 30 | sivaPath := filepath.Join(dir, "Siva") 31 | if err := os.Mkdir(sivaPath, 0775); err != nil { 32 | return nil, closer, err 33 | 34 | } 35 | 36 | downloaderPath := filepath.Join(dir, "downloader") 37 | if err := os.Mkdir(downloaderPath, 0775); err != nil { 38 | return nil, closer, err 39 | } 40 | 41 | fs := osfs.New(sivaPath) 42 | lib, err := siva.NewLibrary("test", fs, &siva.LibraryOptions{ 43 | Bucket: 2, 44 | Transactional: true, 45 | }) 46 | if err != nil { 47 | return nil, closer, err 48 | } 49 | 50 | return &Helper{ 51 | Dir: dir, 52 | Siva: sivaPath, 53 | FS: fs, 54 | TempFS: osfs.New(downloaderPath), 55 | Lib: lib, 56 | }, closer, nil 57 | } 58 | -------------------------------------------------------------------------------- /provider/github_test.go: -------------------------------------------------------------------------------- 1 | package provider 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | "time" 7 | 8 | "github.com/src-d/gitcollector" 9 | "github.com/src-d/gitcollector/discovery" 10 | "github.com/src-d/gitcollector/library" 11 | "github.com/stretchr/testify/require" 12 | ) 13 | 14 | func TestGitHub(t *testing.T) { 15 | var req = require.New(t) 16 | 17 | const ( 18 | org = "src-d" 19 | timeToStop = 5 * time.Second 20 | ) 21 | 22 | queue := make(chan gitcollector.Job, 50) 23 | provider := NewGitHubOrg( 24 | org, 25 | []string{}, 26 | "", 27 | queue, 28 | &discovery.GitHubOpts{ 29 | MaxJobBuffer: 50, 30 | }, 31 | ) 32 | 33 | var ( 34 | consumedJobs = make(chan gitcollector.Job, 200) 35 | stop bool 36 | done = make(chan struct{}) 37 | ) 38 | 39 | go func() { 40 | defer func() { done <- struct{}{} }() 41 | for !stop { 42 | select { 43 | case job, ok := <-queue: 44 | if !ok { 45 | return 46 | } 47 | 48 | select { 49 | case consumedJobs <- job: 50 | case <-time.After(timeToStop): 51 | stop = true 52 | } 53 | } 54 | } 55 | }() 56 | 57 | err := provider.Start() 58 | req.True(discovery.ErrDiscoveryStopped.Is(err)) 59 | 60 | close(queue) 61 | <-done 62 | req.False(stop) 63 | close(consumedJobs) 64 | 65 | for j := range consumedJobs { 66 | job, ok := j.(*library.Job) 67 | req.True(ok) 68 | req.True(job.Type == library.JobDownload) 69 | req.Len(job.Endpoints(), 1) 70 | req.True(strings.Contains(job.Endpoints()[0], org)) 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /provider/github.go: -------------------------------------------------------------------------------- 1 | package provider 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/src-d/gitcollector" 7 | "github.com/src-d/gitcollector/discovery" 8 | "github.com/src-d/gitcollector/library" 9 | 10 | "github.com/google/go-github/v28/github" 11 | ) 12 | 13 | // NewGitHubOrg builds a new gitcollector.Provider 14 | // based on a discovery.Github. 15 | func NewGitHubOrg( 16 | org string, 17 | excludedRepos []string, 18 | authToken string, 19 | queue chan<- gitcollector.Job, 20 | opts *discovery.GitHubOpts, 21 | ) *discovery.GitHub { 22 | return discovery.NewGitHub( 23 | AdvertiseGHRepositoriesOnJobQueue(queue), 24 | discovery.NewGHOrgReposIter(org, excludedRepos, &discovery.GHReposIterOpts{ 25 | AuthToken: authToken, 26 | }), 27 | opts, 28 | ) 29 | } 30 | 31 | // AdvertiseGHRepositoriesOnJobQueue sends the discovered repositories as a 32 | // gitcollector.Jobs to the given channel. It makes a discovery.GitHub plays 33 | // as a gitcollector.Provider 34 | func AdvertiseGHRepositoriesOnJobQueue( 35 | queue chan<- gitcollector.Job, 36 | ) discovery.AdvertiseGHRepositoriesFn { 37 | return func(ctx context.Context, repos []*github.Repository) error { 38 | for _, repo := range repos { 39 | endpoint, err := discovery.GetGHEndpoint(repo) 40 | if err != nil { 41 | return nil 42 | } 43 | 44 | job := &library.Job{ 45 | Type: library.JobDownload, 46 | } 47 | job.SetEndpoints([]string{endpoint}) 48 | 49 | select { 50 | case queue <- job: 51 | case <-ctx.Done(): 52 | return discovery.ErrAdvertiseTimeout. 53 | Wrap(ctx.Err()) 54 | } 55 | } 56 | 57 | return nil 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /_testdata/server.key: -------------------------------------------------------------------------------- 1 | -----BEGIN PRIVATE KEY----- 2 | MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDI4WJC4LZ5VHsx 3 | EEVG6IbsyqlHwmcH5X9ywRAc0iLW/5rzkF5FG5g6caODlSp63e40t6Bc45XUdw3n 4 | +o8Nv/wdEkiozhROca4mPGpV71uDQJaNrV7sp+yflQa6kYr2Wibhk8BaBJEeNfdE 5 | bAAF4hZx6KNLRIMpfGeimNRqVfA+QNfhwP7D9vG8rqxJW9aj6YKxhBonvGumY6kZ 6 | p4sqxEHdb/yTTHR329PsBbT7PJ9Zid9TUy1DttiyZ2UiYPFaYGE5YYiMOqm8s3aL 7 | Mr8WqGCtwqdg1YjeV9Ht9HpDLgTBgivwXQ5b3oobUpbrGkmx52HzjB1HkbxUpK2N 8 | KF4ux/TpAgMBAAECggEAeZwQe4JElaEyTxqVhf7hJ9rsrjefXiVuiY8dbQU9+ECX 9 | ktoUZ+vztXX0UoVsSSOvwJRl7bYNj6lfbs+96MoQG51lBWR04KRrlFRA37NfCE+C 10 | lVSewdBLLLTwdOQFZ2xeW3OyIAzhuZNcFTz4qpcblWwL51YbvXLF3qZ8+b77CQPv 11 | UoRNtXeGS/cB3rFj8L4HfSiRs6TumXZS8RCQPmAhn83qmVfxkF4wdHtxnUDNNTRs 12 | jBWgtkCLWoCJcc/KHqjOJF81u2UdWW6hl2ZB+CmlLZbvxULFYD2+folIR/whwgnV 13 | sP+8p/aQGo60udLZUz2f/+9WW0POuZfXwzqVlMP+MQKBgQDxXEranOlI9eOT6dWn 14 | V2O4quV13iWgiTxyxv1/bWIQKH9N0VD8M7cTomcs/ryLawItvWDC41SOz9QSrKis 15 | PPTXyJKBPaZOp4j0EIDIzlla0vCBdAIbxI7G0nFLTcElqgziQWDLepOjGH4u6bsd 16 | ZlMYsOi5j3fRGw3q0fqhFoMZ7QKBgQDVEIocKnpkAOBJOfTaSY+nR0DWWBBu13YE 17 | /4CKt6u2jmPPiM3JhqnILK/uydBWLB1IKaedJsMadZ0zfGcrfUwR1lx1ZppWvj01 18 | avIdutkihXlSXJdgM8ZhV6M4jT3i9J4ggVaSVk/3lIHiZePxkjUzoM8pMRbxwJHr 19 | RxqOi3Q3bQKBgQDLYiMtjpXCRnmMNbo1Nh2xCkkrhpOccKiAa5BHpavBw+5qa5zb 20 | uNRn9i0NLGdC8Fw4oOF9uESTxyuExPupkAAeKUTTJVkT5HcyewmarM99Ck1iOJhi 21 | P8K0ZbbUZuQAUghjTAY90L/B6qzB6XEkeApDvyeM3glwKmUS3/hbb/XvjQKBgEM5 22 | srnsVS2UcS/pj24IY8D95GE87AJtDRnKm/4osY8qIifrHDMHwC4em2wWVRu7U4nf 23 | Srl07347udvy1oKMSCIgpo+MTgosgBeIpNfoinYv8vWfrUt3SpKQGjnPpsUjNXlA 24 | FdaaUSTmwIx0ZZVk8lBV3sc6a+jfP6sihiiPq52JAoGBAOHYF7nbkJUbiyBVdS6n 25 | ER9lUbim6ViDqUjsjKIHHphbIs9Rh+GrlA4WPVcHVZmhXIDSmon67fxAclTjGI16 26 | enPrTpk+oMDPFkruoUme72JPlOvRFhpLeiBfzMffPuKEqObXYnyxGcP7ya3IAY3R 27 | 5k3GLdCvSDvDsHza2OxVErD1 28 | -----END PRIVATE KEY----- 29 | -------------------------------------------------------------------------------- /worker.go: -------------------------------------------------------------------------------- 1 | package gitcollector 2 | 3 | import ( 4 | "context" 5 | 6 | "gopkg.in/src-d/go-errors.v1" 7 | ) 8 | 9 | type worker struct { 10 | id string 11 | jobs chan Job 12 | cancel chan bool 13 | stopped bool 14 | metrics MetricsCollector 15 | } 16 | 17 | func newWorker(jobs chan Job, metrics MetricsCollector) *worker { 18 | return &worker{ 19 | jobs: jobs, 20 | cancel: make(chan bool), 21 | metrics: metrics, 22 | } 23 | } 24 | 25 | var ( 26 | errJobsClosed = errors.NewKind("jobs channel was closed") 27 | errWorkerStopped = errors.NewKind("worker was stopped") 28 | ) 29 | 30 | func (w *worker) start() { 31 | // It shouldn't be restarted after a call to stop. 32 | if w.stopped { 33 | return 34 | } 35 | 36 | ctx, cancel := context.WithCancel(context.Background()) 37 | defer cancel() 38 | for { 39 | if err := w.consumeJob(ctx); err != nil { 40 | if errJobsClosed.Is(err) || errWorkerStopped.Is(err) { 41 | close(w.cancel) 42 | } 43 | 44 | return 45 | } 46 | } 47 | } 48 | 49 | func (w *worker) consumeJob(ctx context.Context) error { 50 | select { 51 | case <-w.cancel: 52 | return errWorkerStopped.New() 53 | case job, ok := <-w.jobs: 54 | if !ok { 55 | return errJobsClosed.New() 56 | } 57 | 58 | var done = make(chan struct{}) 59 | go func() { 60 | defer close(done) 61 | if err := job.Process(ctx); err != nil { 62 | w.metrics.Fail(job) 63 | return 64 | } 65 | 66 | w.metrics.Success(job) 67 | }() 68 | 69 | select { 70 | case now := <-w.cancel: 71 | if !now { 72 | <-done 73 | } 74 | 75 | return errWorkerStopped.New() 76 | case <-done: 77 | return nil 78 | } 79 | } 80 | } 81 | 82 | func (w *worker) stop(immediate bool) { 83 | if w.stopped { 84 | return 85 | } 86 | 87 | w.cancel <- immediate 88 | w.stopped = true 89 | } 90 | -------------------------------------------------------------------------------- /worker_pool_test.go: -------------------------------------------------------------------------------- 1 | package gitcollector 2 | 3 | import ( 4 | "context" 5 | "sync" 6 | "testing" 7 | "time" 8 | 9 | "github.com/stretchr/testify/require" 10 | ) 11 | 12 | func TestWorkerPool(t *testing.T) { 13 | var require = require.New(t) 14 | require.True(true) 15 | 16 | queue := make(chan Job, 20) 17 | wp := NewWorkerPool(testScheduleFn(queue), &WorkerPoolOpts{}) 18 | 19 | numWorkers := []int{2, 8, 0} 20 | for _, n := range numWorkers { 21 | wp.SetWorkers(n) 22 | require.Equal(wp.Size(), n) 23 | } 24 | 25 | var ( 26 | ids = []string{ 27 | "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", 28 | } 29 | 30 | mu sync.Mutex 31 | got []string 32 | process = func(id string) error { 33 | mu.Lock() 34 | defer mu.Unlock() 35 | 36 | got = append(got, id) 37 | return nil 38 | } 39 | ) 40 | 41 | wp.SetWorkers(10) 42 | wp.Run() 43 | 44 | for _, id := range ids { 45 | queue <- &testJob{ 46 | id: id, 47 | process: process, 48 | } 49 | } 50 | close(queue) 51 | 52 | wp.Wait() 53 | require.ElementsMatch(ids, got) 54 | 55 | queue = make(chan Job, 20) 56 | wp = NewWorkerPool(testScheduleFn(queue), &WorkerPoolOpts{}) 57 | 58 | wp.SetWorkers(20) 59 | wp.Run() 60 | 61 | for range ids { 62 | queue <- &testJob{} 63 | } 64 | 65 | wp.Stop() 66 | require.Len(wp.workers, 0) 67 | } 68 | 69 | type testJob struct { 70 | id string 71 | process func(id string) error 72 | } 73 | 74 | var _ Job = (*testJob)(nil) 75 | 76 | func (j *testJob) Process(_ context.Context) error { 77 | if j.process == nil { 78 | return nil 79 | } 80 | 81 | return j.process(j.id) 82 | } 83 | 84 | func testScheduleFn(queue chan Job) JobScheduleFn { 85 | return func(_ context.Context) (Job, error) { 86 | select { 87 | case job, ok := <-queue: 88 | if !ok { 89 | return nil, ErrJobSource.New() 90 | } 91 | 92 | return job, nil 93 | case <-time.After(50 * time.Millisecond): 94 | return nil, ErrNewJobsNotFound.New() 95 | } 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /downloader/testhelper/brokenfs.go: -------------------------------------------------------------------------------- 1 | package testhelper 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "gopkg.in/src-d/go-billy.v4" 8 | ) 9 | 10 | // ErrBrokenFS is a default error that will be returned if some operation is mocked 11 | var ErrBrokenFS = fmt.Errorf("mocked") 12 | 13 | // BrokenFSOptions contains list of operations that will be broken in the scope of this mock up 14 | type BrokenFSOptions struct { 15 | FailedOpen bool 16 | FailedOpenFile bool 17 | FailedStat bool 18 | FailedChroot bool 19 | FailedCreate bool 20 | FailedTempFile bool 21 | } 22 | 23 | // BrokenFS is a simple billy.Filesystem mockup 24 | type BrokenFS struct { 25 | billy.Filesystem 26 | opts BrokenFSOptions 27 | } 28 | 29 | // NewBrokenFS is BrokenFS constructor 30 | func NewBrokenFS(fs billy.Filesystem, opts BrokenFSOptions) *BrokenFS { 31 | return &BrokenFS{Filesystem: fs, opts: opts} 32 | } 33 | 34 | func (fs *BrokenFS) Open(filename string) (billy.File, error) { 35 | if fs.opts.FailedOpen { 36 | return nil, ErrBrokenFS 37 | } 38 | return fs.Filesystem.Open(filename) 39 | } 40 | 41 | func (fs *BrokenFS) OpenFile(filename string, flag int, perm os.FileMode) (billy.File, error) { 42 | if fs.opts.FailedOpenFile { 43 | return nil, ErrBrokenFS 44 | } 45 | return fs.Filesystem.OpenFile(filename, flag, perm) 46 | } 47 | 48 | func (fs *BrokenFS) Stat(filename string) (os.FileInfo, error) { 49 | if fs.opts.FailedStat { 50 | return nil, ErrBrokenFS 51 | } 52 | return fs.Filesystem.Stat(filename) 53 | } 54 | 55 | func (fs *BrokenFS) Chroot(path string) (billy.Filesystem, error) { 56 | if fs.opts.FailedChroot { 57 | return nil, ErrBrokenFS 58 | } 59 | newFS, err := fs.Filesystem.Chroot(path) 60 | return NewBrokenFS(newFS, fs.opts), err 61 | } 62 | 63 | func (fs *BrokenFS) Create(filename string) (billy.File, error) { 64 | if fs.opts.FailedCreate { 65 | return nil, ErrBrokenFS 66 | } 67 | return fs.Filesystem.Create(filename) 68 | } 69 | 70 | func (fs *BrokenFS) TempFile(dir, prefix string) (billy.File, error) { 71 | if fs.opts.FailedTempFile { 72 | return nil, ErrBrokenFS 73 | } 74 | return fs.Filesystem.TempFile(dir, prefix) 75 | } 76 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/src-d/gitcollector 2 | 3 | go 1.12 4 | 5 | require ( 6 | github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 // indirect 7 | github.com/Microsoft/go-winio v0.4.14 // indirect 8 | github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 // indirect 9 | github.com/cenkalti/backoff v2.2.1+incompatible 10 | github.com/containerd/continuity v0.0.0-20190827140505-75bee3e2ccb6 // indirect 11 | github.com/docker/go-connections v0.4.0 // indirect 12 | github.com/docker/go-units v0.4.0 // indirect 13 | github.com/gliderlabs/ssh v0.2.2 // indirect 14 | github.com/google/go-github/v28 v28.1.1 15 | github.com/google/uuid v1.1.1 16 | github.com/gotestyourself/gotestyourself v2.2.0+incompatible // indirect 17 | github.com/jpillora/backoff v0.0.0-20180909062703-3050d21c67d7 18 | github.com/kami-zh/go-capturer v0.0.0-20171211120116-e492ea43421d // indirect 19 | github.com/kevinburke/ssh_config v0.0.0-20190630040420-2e50c441276c // indirect 20 | github.com/lib/pq v1.1.1 21 | github.com/mattn/go-colorable v0.1.2 // indirect 22 | github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b // indirect 23 | github.com/onsi/ginkgo v1.8.0 // indirect 24 | github.com/onsi/gomega v1.5.0 // indirect 25 | github.com/opencontainers/go-digest v1.0.0-rc1 // indirect 26 | github.com/opencontainers/image-spec v1.0.1 // indirect 27 | github.com/opencontainers/runc v0.1.1 // indirect 28 | github.com/ory/dockertest v3.3.5+incompatible 29 | github.com/sirupsen/logrus v1.4.2 // indirect 30 | github.com/src-d/envconfig v1.0.0 // indirect 31 | github.com/src-d/go-borges v0.0.0-20190704083038-44867e8f2a2a 32 | github.com/stretchr/testify v1.4.0 33 | github.com/x-cray/logrus-prefixed-formatter v0.5.2 // indirect 34 | golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4 // indirect 35 | golang.org/x/net v0.0.0-20190628185345-da137c7871d7 // indirect 36 | golang.org/x/oauth2 v0.0.0-20190523182746-aaccbc9213b0 37 | golang.org/x/sync v0.0.0-20190423024810-112230192c58 // indirect 38 | golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7 // indirect 39 | gopkg.in/src-d/go-billy.v4 v4.3.1 40 | gopkg.in/src-d/go-cli.v0 v0.0.0-20190422143124-3a646154da79 41 | gopkg.in/src-d/go-errors.v1 v1.0.0 42 | gopkg.in/src-d/go-git.v4 v4.12.0 43 | gopkg.in/src-d/go-log.v1 v1.0.2 44 | gopkg.in/yaml.v2 v2.2.4 // indirect 45 | gotest.tools v2.2.0+incompatible // indirect 46 | ) 47 | -------------------------------------------------------------------------------- /Jenkinsfile: -------------------------------------------------------------------------------- 1 | pipeline { 2 | agent { 3 | kubernetes { 4 | label 'regression-retrieval' 5 | inheritFrom 'performance' 6 | defaultContainer 'regression-retrieval' 7 | nodeSelector 'srcd.host/type=jenkins-worker' 8 | containerTemplate { 9 | name 'regression-retrieval' 10 | image 'srcd/regression-retrieval:latest' 11 | ttyEnabled true 12 | command 'cat' 13 | } 14 | } 15 | } 16 | environment { 17 | GOPATH = "/go" 18 | GO_IMPORT_PATH = "github.com/src-d/regression-retrieval" 19 | GO_IMPORT_FULL_PATH = "${env.GOPATH}/src/${env.GO_IMPORT_PATH}" 20 | GO111MODULE = "on" 21 | PROM_ADDRESS = "http://prom-pushgateway-prometheus-pushgateway.monitoring.svc.cluster.local:9091" 22 | PROM_JOB = "retrieval_performance" 23 | } 24 | triggers { pollSCM('0 0,12 * * *') } 25 | stages { 26 | stage('Run performance tests') { 27 | when { branch 'master' } 28 | steps { 29 | sh '/bin/regression-retrieval --kind=gitcollector --csv --prom local:HEAD' 30 | } 31 | } 32 | stage('PR-run') { 33 | when { changeRequest target: 'master' } 34 | steps { 35 | sh '/bin/regression-retrieval --kind=gitcollector remote:master local:HEAD' 36 | } 37 | } 38 | stage('Plot') { 39 | when { branch 'master' } 40 | steps { 41 | script { 42 | plotFiles = findFiles(glob: "plot_*.csv") 43 | plotFiles.each { 44 | echo "plot ${it.getName()}" 45 | sh "cat ${it.getName()}" 46 | plot( 47 | group: 'performance', 48 | csvFileName: it.getName(), 49 | title: it.getName(), 50 | numBuilds: '100', 51 | style: 'line', 52 | csvSeries: [[ 53 | displayTableFlag: false, 54 | exclusionValues: '', 55 | file: it.getName(), 56 | inclusionFlag: 'OFF', 57 | ]] 58 | ) 59 | } 60 | } 61 | } 62 | } 63 | } 64 | post { 65 | success { 66 | slackSend (color: '#2eb886', message: "SUCCESS: `${env.JOB_NAME}` <${env.BUILD_URL}|build #${env.BUILD_NUMBER}>") 67 | } 68 | failure { 69 | slackSend (color: '#b82e60', message: "FAILED: `${env.JOB_NAME}` <${env.BUILD_URL}|build #${env.BUILD_NUMBER}>") 70 | } 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /metrics/metrics_db.go: -------------------------------------------------------------------------------- 1 | package metrics 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "fmt" 7 | 8 | "github.com/src-d/gitcollector/library" 9 | 10 | // postgres database driver 11 | _ "github.com/lib/pq" 12 | ) 13 | 14 | // PrepareDB performs the necessary operations to send metrics to a postgres 15 | // database. 16 | func PrepareDB(uri string, table string, orgs []string) (*sql.DB, error) { 17 | db, err := sql.Open("postgres", uri) 18 | if err != nil { 19 | return nil, err 20 | } 21 | 22 | if err := db.Ping(); err != nil { 23 | return nil, err 24 | } 25 | 26 | statements := []string{ 27 | fmt.Sprintf(create, table), 28 | fmt.Sprintf(addColumns, table), 29 | } 30 | 31 | for _, org := range orgs { 32 | statements = append(statements, 33 | fmt.Sprintf(insert, table, org)) 34 | } 35 | 36 | tx, err := db.Begin() 37 | if err != nil { 38 | db.Close() 39 | return nil, err 40 | } 41 | 42 | for _, s := range statements { 43 | if _, err := tx.Exec(s); err != nil { 44 | tx.Rollback() 45 | db.Close() 46 | return nil, err 47 | } 48 | } 49 | 50 | if err := tx.Commit(); err != nil { 51 | db.Close() 52 | return nil, err 53 | } 54 | 55 | return db, nil 56 | } 57 | 58 | const ( 59 | create = `CREATE TABLE IF NOT EXISTS %s ( 60 | org VARCHAR(50) NOT NULL, 61 | discovered INTEGER NOT NULL, 62 | downloaded INTEGER NOT NULL, 63 | updated INTEGER NOT NULL, 64 | failed INTEGER NOT NULL 65 | )` 66 | 67 | insert = `INSERT INTO %[1]s(org, discovered, downloaded, updated, failed) 68 | SELECT '%[2]s',0,0,0,0 69 | WHERE NOT EXISTS (SELECT * FROM %[1]s WHERE org='%[2]s')` 70 | 71 | addColumns = `ALTER TABLE %s 72 | ADD COLUMN IF NOT EXISTS discovered INTEGER, 73 | ADD COLUMN IF NOT EXISTS downloaded INTEGER, 74 | ADD COLUMN IF NOT EXISTS updated INTEGER, 75 | ADD COLUMN IF NOT EXISTS failed INTEGER` 76 | 77 | update = `UPDATE %s 78 | SET discovered = %d, 79 | downloaded = %d, 80 | updated = %d, 81 | failed = %d 82 | WHERE org = '%s';` 83 | ) 84 | 85 | // SendToDB is a SendFn to persist metrics on a database. 86 | func SendToDB(db *sql.DB, table, org string) SendFn { 87 | return func( 88 | ctx context.Context, 89 | mc *Collector, 90 | _ *library.Job, 91 | ) error { 92 | statement := fmt.Sprintf( 93 | update, 94 | table, 95 | mc.discoverCount, 96 | mc.successDownloadCount, 97 | mc.successUpdateCount, 98 | mc.failCount, 99 | org, 100 | ) 101 | 102 | _, err := db.ExecContext(ctx, statement) 103 | return err 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /scheduler.go: -------------------------------------------------------------------------------- 1 | package gitcollector 2 | 3 | import ( 4 | "context" 5 | "time" 6 | 7 | "github.com/jpillora/backoff" 8 | "gopkg.in/src-d/go-errors.v1" 9 | ) 10 | 11 | var ( 12 | // ErrNewJobsNotFound must be returned by a JobScheduleFn when it can't 13 | // find new Jobs. 14 | ErrNewJobsNotFound = errors.NewKind( 15 | "couldn't find new jobs to schedule") 16 | 17 | // ErrJobSource must be returned by a JobScheduleFn when the source of 18 | // job is closed. 19 | ErrJobSource = errors.NewKind("job source is closed") 20 | ) 21 | 22 | // JobScheduleFn is a function to schedule the next Job. 23 | type JobScheduleFn func(context.Context) (Job, error) 24 | 25 | type jobScheduler struct { 26 | jobs chan Job 27 | schedule JobScheduleFn 28 | cancel chan struct{} 29 | opts *WorkerPoolOpts 30 | backoff *backoff.Backoff 31 | } 32 | 33 | const ( 34 | schedCapacity = 1000 35 | schedTimeout = 5 * time.Second 36 | 37 | // backoff default configuration 38 | backoffMinDuration = 250 * time.Millisecond 39 | backoffMaxDuration = 1024 * time.Second 40 | backoffFactor = 2 41 | backoffJitter = true 42 | ) 43 | 44 | func newJobScheduler( 45 | schedule JobScheduleFn, 46 | opts *WorkerPoolOpts, 47 | ) *jobScheduler { 48 | if opts.SchedulerCapacity <= 0 { 49 | opts.SchedulerCapacity = schedCapacity 50 | } 51 | 52 | if opts.ScheduleJobTimeout <= 0 { 53 | opts.ScheduleJobTimeout = schedTimeout 54 | } 55 | 56 | return &jobScheduler{ 57 | jobs: make(chan Job, opts.SchedulerCapacity), 58 | schedule: schedule, 59 | cancel: make(chan struct{}), 60 | opts: opts, 61 | backoff: &backoff.Backoff{ 62 | Min: backoffMinDuration, 63 | Max: backoffMaxDuration, 64 | Factor: backoffFactor, 65 | Jitter: backoffJitter, 66 | }, 67 | } 68 | } 69 | 70 | func (s *jobScheduler) finish() { 71 | s.cancel <- struct{}{} 72 | } 73 | 74 | func (s *jobScheduler) Schedule() { 75 | s.backoff.Reset() 76 | for { 77 | select { 78 | case <-s.cancel: 79 | return 80 | default: 81 | ctx, cancel := context.WithTimeout( 82 | context.Background(), 83 | s.opts.ScheduleJobTimeout, 84 | ) 85 | 86 | defer cancel() 87 | job, err := s.schedule(ctx) 88 | if err != nil { 89 | if ErrNewJobsNotFound.Is(err) { 90 | if s.opts.NotWaitNewJobs { 91 | continue 92 | } 93 | 94 | } 95 | 96 | if ErrJobSource.Is(err) { 97 | close(s.jobs) 98 | return 99 | } 100 | 101 | select { 102 | case <-s.cancel: 103 | return 104 | case <-time.After(s.backoff.Duration()): 105 | } 106 | 107 | continue 108 | } 109 | 110 | select { 111 | case s.jobs <- job: 112 | s.backoff.Reset() 113 | s.opts.Metrics.Discover(job) 114 | case <-s.cancel: 115 | return 116 | } 117 | } 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /library/job_test.go: -------------------------------------------------------------------------------- 1 | package library 2 | 3 | import ( 4 | "context" 5 | "sync" 6 | "testing" 7 | 8 | "github.com/src-d/gitcollector" 9 | "github.com/stretchr/testify/require" 10 | "gopkg.in/src-d/go-log.v1" 11 | ) 12 | 13 | func TestJobScheduleFn(t *testing.T) { 14 | var ( 15 | endpoints = []string{ 16 | "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", 17 | } 18 | 19 | mu sync.Mutex 20 | got []string 21 | processFn = func(_ context.Context, j *Job) error { 22 | mu.Lock() 23 | defer mu.Unlock() 24 | 25 | got = append(got, j.Endpoints()[0]) 26 | return nil 27 | } 28 | ) 29 | 30 | download := make(chan gitcollector.Job, 2) 31 | update := make(chan gitcollector.Job, 20) 32 | sched := NewJobScheduleFn( 33 | nil, 34 | download, update, 35 | processFn, processFn, 36 | false, 37 | nil, 38 | log.New(nil), 39 | nil, 40 | ) 41 | 42 | queues := []chan gitcollector.Job{download, update} 43 | expected := testScheduleFn(sched, endpoints, queues) 44 | require.ElementsMatch(t, expected, got) 45 | } 46 | 47 | func TestDownloadJobScheduleFn(t *testing.T) { 48 | var ( 49 | endpoints = []string{ 50 | "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", 51 | } 52 | 53 | mu sync.Mutex 54 | got []string 55 | processFn = func(_ context.Context, j *Job) error { 56 | mu.Lock() 57 | defer mu.Unlock() 58 | 59 | got = append(got, j.Endpoints()[0]) 60 | return nil 61 | } 62 | ) 63 | 64 | download := make(chan gitcollector.Job, 5) 65 | sched := NewDownloadJobScheduleFn( 66 | nil, 67 | download, 68 | processFn, 69 | false, 70 | nil, 71 | log.New(nil), 72 | nil, 73 | ) 74 | 75 | queues := []chan gitcollector.Job{download} 76 | expected := testScheduleFn(sched, endpoints, queues) 77 | require.ElementsMatch(t, expected, got) 78 | } 79 | 80 | func TestUpdateJobScheduleFn(t *testing.T) { 81 | var ( 82 | endpoints = []string{ 83 | "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", 84 | } 85 | 86 | mu sync.Mutex 87 | got []string 88 | processFn = func(_ context.Context, j *Job) error { 89 | mu.Lock() 90 | defer mu.Unlock() 91 | 92 | got = append(got, j.Endpoints()[0]) 93 | return nil 94 | } 95 | ) 96 | 97 | update := make(chan gitcollector.Job, 5) 98 | sched := NewUpdateJobScheduleFn( 99 | nil, update, processFn, nil, log.New(nil), 100 | ) 101 | queues := []chan gitcollector.Job{update} 102 | expected := testScheduleFn(sched, endpoints, queues) 103 | require.ElementsMatch(t, expected, got) 104 | } 105 | 106 | func testScheduleFn( 107 | sched gitcollector.JobScheduleFn, 108 | endpoints []string, 109 | queues []chan gitcollector.Job, 110 | ) []string { 111 | wp := gitcollector.NewWorkerPool( 112 | sched, 113 | &gitcollector.WorkerPoolOpts{ 114 | NotWaitNewJobs: true, 115 | }, 116 | ) 117 | 118 | wp.SetWorkers(10) 119 | wp.Run() 120 | 121 | for _, e := range endpoints { 122 | for i, queue := range queues { 123 | var t JobType = JobDownload 124 | if i != 0 { 125 | t = JobUpdate 126 | } 127 | 128 | queue <- &Job{ 129 | Type: t, 130 | endpoints: []string{e}, 131 | } 132 | } 133 | } 134 | 135 | var expected []string 136 | for _, queue := range queues { 137 | expected = append(expected, endpoints...) 138 | close(queue) 139 | } 140 | 141 | wp.Wait() 142 | return expected 143 | } 144 | -------------------------------------------------------------------------------- /updater/update_test.go: -------------------------------------------------------------------------------- 1 | package updater 2 | 3 | import ( 4 | "context" 5 | "io/ioutil" 6 | "os" 7 | "testing" 8 | 9 | "github.com/src-d/gitcollector/library" 10 | "github.com/src-d/go-borges" 11 | "github.com/src-d/go-borges/siva" 12 | "gopkg.in/src-d/go-billy.v4" 13 | "gopkg.in/src-d/go-billy.v4/osfs" 14 | "gopkg.in/src-d/go-log.v1" 15 | 16 | "github.com/stretchr/testify/require" 17 | ) 18 | 19 | func TestUpdate(t *testing.T) { 20 | var req = require.New(t) 21 | 22 | locID := borges.LocationID( 23 | "f2cee90acf3c6644d51a37057845b98ab1580932") 24 | 25 | endpoints := []string{ 26 | // 263 commits main 27 | "git://github.com/jtoy/awesome-tensorflow.git", 28 | // 257 commits forked 29 | "git://github.com/SiweiLuo/awesome-tensorflow.git", 30 | // 257 commits forked 31 | "git://github.com/youtang1993/awesome-tensorflow.git", 32 | } 33 | 34 | dir1, err := ioutil.TempDir("", "gitcollector") 35 | req.NoError(err) 36 | defer os.RemoveAll(dir1) 37 | 38 | lib1, loc1 := setupLocation(t, dir1, locID, endpoints) 39 | 40 | repo, err := loc1.Get("", borges.ReadOnlyMode) 41 | req.NoError(err) 42 | 43 | _, err = repo.FS().Stat("objects") 44 | req.True(os.IsNotExist(err)) 45 | 46 | job := &library.Job{ 47 | ID: "foo", 48 | Type: library.JobUpdate, 49 | Lib: lib1, 50 | LocationID: locID, 51 | AuthToken: func(string) string { return "" }, 52 | Logger: log.New(nil), 53 | } 54 | 55 | // Update all remotes 56 | req.NoError(Update(context.TODO(), job)) 57 | 58 | repo, err = loc1.Get("", borges.ReadOnlyMode) 59 | req.NoError(err) 60 | 61 | _, err = repo.FS().Stat("objects") 62 | req.NoError(err) 63 | size1 := objectsSize(t, repo.FS()) 64 | 65 | dir2, err := ioutil.TempDir("", "gitcollector") 66 | req.NoError(err) 67 | defer os.RemoveAll(dir2) 68 | 69 | lib2, loc2 := setupLocation(t, dir2, locID, endpoints) 70 | 71 | repo, err = loc2.Get("", borges.ReadOnlyMode) 72 | req.NoError(err) 73 | 74 | _, err = repo.FS().Stat("objects") 75 | req.True(os.IsNotExist(err)) 76 | 77 | job.Lib = lib2 78 | job.SetEndpoints([]string{endpoints[1]}) 79 | 80 | // Update just one remote 81 | req.NoError(Update(context.TODO(), job)) 82 | 83 | repo, err = loc2.Get("", borges.ReadOnlyMode) 84 | req.NoError(err) 85 | 86 | _, err = repo.FS().Stat("objects") 87 | req.NoError(err) 88 | size2 := objectsSize(t, repo.FS()) 89 | 90 | req.True(size1 > size2) 91 | } 92 | 93 | func setupLocation( 94 | t *testing.T, 95 | path string, 96 | locID borges.LocationID, 97 | endpoints []string, 98 | ) (borges.Library, *siva.Location) { 99 | t.Helper() 100 | var req = require.New(t) 101 | 102 | fs := osfs.New(path) 103 | lib, err := siva.NewLibrary("test", fs, &siva.LibraryOptions{ 104 | Transactional: true, 105 | }) 106 | req.NoError(err) 107 | 108 | l, err := lib.AddLocation(locID) 109 | req.NoError(err) 110 | 111 | loc, ok := l.(*siva.Location) 112 | req.True(ok) 113 | 114 | for _, ep := range endpoints { 115 | repoID, err := borges.NewRepositoryID(ep) 116 | req.NoError(err) 117 | 118 | repo, err := loc.Init(repoID) 119 | req.NoError(err) 120 | req.NoError(repo.Commit()) 121 | } 122 | 123 | return lib, loc 124 | } 125 | 126 | func objectsSize(t *testing.T, fs billy.Filesystem) int64 { 127 | t.Helper() 128 | var req = require.New(t) 129 | 130 | entries, err := fs.ReadDir("objects/pack") 131 | req.NoError(err) 132 | 133 | var size int64 134 | for _, e := range entries { 135 | size += e.Size() 136 | } 137 | 138 | return size 139 | } 140 | -------------------------------------------------------------------------------- /integration/postgres_test.go: -------------------------------------------------------------------------------- 1 | package integration 2 | 3 | import ( 4 | "database/sql" 5 | "os" 6 | "runtime" 7 | "strings" 8 | "testing" 9 | 10 | _ "github.com/lib/pq" 11 | "github.com/stretchr/testify/require" 12 | "gopkg.in/src-d/go-log.v1" 13 | ) 14 | 15 | const ( 16 | orgs = "git-fixtures" 17 | 18 | dbTable = "test_table" 19 | ) 20 | 21 | // TODO https://github.com/src-d/infrastructure/issues/1130 22 | var expMetric = metric{ 23 | org: "git-fixtures", 24 | discovered: 8, 25 | downloaded: 7, 26 | updated: 0, 27 | // this failed repo is an empty one 28 | failed: 1, 29 | } 30 | 31 | type metric struct { 32 | org string 33 | discovered int 34 | downloaded int 35 | updated int 36 | failed int 37 | } 38 | 39 | // TODO maybe we can somehow mock up failure of write metrics query? 40 | func TestPostgres(t *testing.T) { 41 | // docker service is not supported on osx https://github.com/travis-ci/travis-ci/issues/5738#issuecomment-227154200 42 | if runtime.GOOS == "darwin" { 43 | t.Skip("cannot run these tests on osx") 44 | } 45 | 46 | if os.Getenv("GITHUB_TOKEN") == "" { 47 | t.Skip("github token not defined") 48 | } 49 | 50 | h, err := NewHelper(orgs) 51 | require.NoError(t, err) 52 | defer h.Close() 53 | 54 | for _, tst := range []struct { 55 | name string 56 | fnc func(t *testing.T, h *helper) 57 | }{ 58 | {"testPostgresEmptyURI", testPostgresEmptyURI}, 59 | {"testBrokenPostgresEndpoint", testBrokenPostgresEndpoint}, 60 | {"testPostgresCreateSchemaFail", testPostgresCreateSchemaFail}, 61 | {"testPostgresSendMetricsSuccess", testPostgresSendMetricsSuccess}, 62 | } { 63 | tst := tst 64 | t.Run(tst.name, func(t *testing.T) { 65 | defer func() { h.Cleanup() }() 66 | tst.fnc(t, h) 67 | }) 68 | } 69 | } 70 | 71 | func testPostgresEmptyURI(t *testing.T, h *helper) { 72 | h.cmd.MetricsDBURI = "" 73 | require.NoError(t, h.Exec()) 74 | } 75 | 76 | func testBrokenPostgresEndpoint(t *testing.T, h *helper) { 77 | h.cmd.MetricsDBURI = "postgres://broken:5432" 78 | 79 | err := h.Exec() 80 | require.Error(t, err) 81 | 82 | if strings.Contains(err.Error(), "lookup broken") { 83 | require.Contains(t, err.Error(), "lookup broken") 84 | } else { 85 | require.Contains(t, err.Error(), "no such host") 86 | } 87 | } 88 | 89 | func testPostgresCreateSchemaFail(t *testing.T, h *helper) { 90 | dbURI, err := h.CreateDB() 91 | log.Infof("dbURI: %v", dbURI) 92 | require.NoError(t, err) 93 | 94 | h.cmd.MetricsDBURI = dbURI 95 | h.cmd.MetricsDBTable = "" 96 | 97 | require.Error(t, h.Exec()) 98 | } 99 | 100 | func testPostgresSendMetricsSuccess(t *testing.T, h *helper) { 101 | dbURI, err := h.CreateDB() 102 | require.NoError(t, err) 103 | 104 | h.cmd.MetricsDBURI = dbURI 105 | h.cmd.MetricsDBTable = dbTable 106 | 107 | require.NoError(t, h.Exec()) 108 | 109 | res, err := parseTable(dbURI, dbTable) 110 | require.NoError(t, err) 111 | require.Equal(t, []metric{expMetric}, res) 112 | } 113 | 114 | func parseTable(dbURI, table string) (res []metric, err error) { 115 | cli, err := sql.Open("postgres", dbURI) 116 | if err != nil { 117 | return nil, err 118 | } 119 | 120 | rows, err := cli.Query("select * from " + table) 121 | if err != nil { 122 | return nil, err 123 | } 124 | 125 | for rows.Next() { 126 | var m metric 127 | err = rows.Scan( 128 | &m.org, 129 | &m.discovered, 130 | &m.downloaded, 131 | &m.updated, 132 | &m.failed, 133 | ) 134 | if err != nil { 135 | return nil, err 136 | } 137 | res = append(res, m) 138 | } 139 | return 140 | } 141 | -------------------------------------------------------------------------------- /provider/updates_test.go: -------------------------------------------------------------------------------- 1 | package provider 2 | 3 | import ( 4 | "sync" 5 | "testing" 6 | "time" 7 | 8 | "github.com/src-d/gitcollector" 9 | "github.com/src-d/gitcollector/library" 10 | "github.com/src-d/go-borges" 11 | "github.com/src-d/go-borges/plain" 12 | "github.com/src-d/go-borges/util" 13 | "github.com/stretchr/testify/require" 14 | ) 15 | 16 | func TestUpdates(t *testing.T) { 17 | var require = require.New(t) 18 | require.True(true) 19 | 20 | ids := []borges.LocationID{ 21 | "a", "b", "c", "d", "e", "f", 22 | } 23 | 24 | lib := &testLib{locIDs: ids[:3]} 25 | 26 | queue := make(chan gitcollector.Job, 30) 27 | provider := NewUpdates(lib, queue, &UpdatesOpts{ 28 | TriggerOnce: true, 29 | TriggerInterval: 500 * time.Microsecond, 30 | }) 31 | 32 | go runProvider(t, provider) 33 | 34 | time.Sleep(100 * time.Millisecond) 35 | require.Len(lib.locIDs, len(queue)) 36 | 37 | provider.opts.TriggerOnce = false 38 | go runProvider(t, provider) 39 | 40 | time.Sleep(200 * time.Microsecond) 41 | for _, id := range ids[3:] { 42 | lib.addLocationID(id) 43 | } 44 | 45 | time.Sleep(250 * time.Millisecond) 46 | require.NoError(provider.Stop()) 47 | 48 | require.Len(queue, cap(queue)) 49 | for i := 0; i < cap(queue); i++ { 50 | job := <-queue 51 | j, ok := job.(*library.Job) 52 | require.True(ok) 53 | require.Contains(ids, j.LocationID) 54 | require.True(j.Type == library.JobUpdate) 55 | } 56 | } 57 | 58 | func runProvider(t *testing.T, provider *Updates) { 59 | t.Helper() 60 | require.True( 61 | t, 62 | ErrUpdatesStopped.Is(provider.Start()), 63 | ) 64 | } 65 | 66 | type testLib struct { 67 | mu sync.RWMutex 68 | locIDs []borges.LocationID 69 | } 70 | 71 | var _ borges.Library = (*testLib)(nil) 72 | 73 | func (l *testLib) addLocationID(id borges.LocationID) { 74 | l.mu.Lock() 75 | l.locIDs = append(l.locIDs, id) 76 | l.mu.Unlock() 77 | } 78 | 79 | func (l *testLib) ID() borges.LibraryID { return "test" } 80 | 81 | func (l *testLib) Init(id borges.RepositoryID) (borges.Repository, error) { 82 | return nil, borges.ErrNotImplemented.New() 83 | } 84 | 85 | func (l *testLib) Get( 86 | id borges.RepositoryID, 87 | mode borges.Mode, 88 | ) (borges.Repository, error) { 89 | return nil, borges.ErrNotImplemented.New() 90 | } 91 | 92 | func (l *testLib) GetOrInit(id borges.RepositoryID) (borges.Repository, error) { 93 | return nil, borges.ErrNotImplemented.New() 94 | } 95 | 96 | func (l *testLib) Has( 97 | id borges.RepositoryID, 98 | ) (bool, borges.LibraryID, borges.LocationID, error) { 99 | return false, "", "", borges.ErrNotImplemented.New() 100 | } 101 | 102 | func (l *testLib) Repositories( 103 | mode borges.Mode, 104 | ) (borges.RepositoryIterator, error) { 105 | return nil, borges.ErrNotImplemented.New() 106 | } 107 | 108 | func (l *testLib) Location(id borges.LocationID) (borges.Location, error) { 109 | return nil, borges.ErrNotImplemented.New() 110 | } 111 | 112 | func (l *testLib) Locations() (borges.LocationIterator, error) { 113 | l.mu.RLock() 114 | defer l.mu.RUnlock() 115 | 116 | var locs []borges.Location 117 | for _, id := range l.locIDs { 118 | loc, err := plain.NewLocation(id, nil, nil) 119 | if err != nil { 120 | return nil, err 121 | } 122 | 123 | locs = append(locs, loc) 124 | } 125 | 126 | return util.NewLocationIterator(locs), nil 127 | } 128 | 129 | func (l *testLib) Library(id borges.LibraryID) (borges.Library, error) { 130 | return nil, borges.ErrNotImplemented.New() 131 | } 132 | 133 | func (l *testLib) Libraries() (borges.LibraryIterator, error) { 134 | return nil, borges.ErrNotImplemented.New() 135 | } 136 | -------------------------------------------------------------------------------- /worker_pool.go: -------------------------------------------------------------------------------- 1 | package gitcollector 2 | 3 | import ( 4 | "sync" 5 | "time" 6 | ) 7 | 8 | // WorkerPoolOpts are configuration options for a JobScheduler. 9 | type WorkerPoolOpts struct { 10 | SchedulerCapacity int 11 | ScheduleJobTimeout time.Duration 12 | NotWaitNewJobs bool 13 | Metrics MetricsCollector 14 | } 15 | 16 | // WorkerPool holds a pool of workers to process Jobs. 17 | type WorkerPool struct { 18 | scheduler *jobScheduler 19 | workers []*worker 20 | resize chan struct{} 21 | wg sync.WaitGroup 22 | opts *WorkerPoolOpts 23 | } 24 | 25 | // NewWorkerPool builds a new WorkerPool. 26 | func NewWorkerPool( 27 | schedule JobScheduleFn, 28 | opts *WorkerPoolOpts, 29 | ) *WorkerPool { 30 | resize := make(chan struct{}, 1) 31 | resize <- struct{}{} 32 | if opts.Metrics == nil { 33 | opts.Metrics = &hollowMetricsCollector{} 34 | } 35 | 36 | return &WorkerPool{ 37 | scheduler: newJobScheduler(schedule, opts), 38 | resize: resize, 39 | opts: opts, 40 | } 41 | } 42 | 43 | // Run notify workers to start. 44 | func (wp *WorkerPool) Run() { 45 | go wp.opts.Metrics.Start() 46 | go wp.scheduler.Schedule() 47 | } 48 | 49 | // Size returns the current number of workers in the pool. 50 | func (wp *WorkerPool) Size() int { 51 | <-wp.resize 52 | defer func() { wp.resize <- struct{}{} }() 53 | 54 | return len(wp.workers) 55 | } 56 | 57 | // SetWorkers set the number of Workers in the pool to n. 58 | func (wp *WorkerPool) SetWorkers(n int) { 59 | <-wp.resize 60 | defer func() { wp.resize <- struct{}{} }() 61 | 62 | if n < 0 { 63 | n = 0 64 | } 65 | 66 | diff := n - len(wp.workers) 67 | if diff == 0 { 68 | return 69 | } else if diff > 0 { 70 | wp.add(diff) 71 | } else { 72 | wp.remove(-diff) 73 | } 74 | } 75 | 76 | func (wp *WorkerPool) add(n int) { 77 | wp.wg.Add(n) 78 | for i := 0; i < n; i++ { 79 | w := newWorker(wp.scheduler.jobs, wp.opts.Metrics) 80 | go func() { 81 | w.start() 82 | wp.wg.Done() 83 | }() 84 | 85 | wp.workers = append(wp.workers, w) 86 | } 87 | } 88 | 89 | func (wp *WorkerPool) remove(n int) { 90 | var ( 91 | i = len(wp.workers) - n 92 | workersToStop = wp.workers[i:] 93 | wg sync.WaitGroup 94 | ) 95 | 96 | wg.Add(len(workersToStop)) 97 | for _, w := range workersToStop { 98 | worker := w 99 | go func() { 100 | worker.stop(false) 101 | wg.Done() 102 | }() 103 | } 104 | 105 | wp.workers = wp.workers[:i] 106 | wg.Wait() 107 | } 108 | 109 | // Wait waits for the workers to finish. 110 | func (wp *WorkerPool) Wait() { 111 | wp.wg.Wait() 112 | wp.workers = nil 113 | wp.opts.Metrics.Stop(false) 114 | } 115 | 116 | // Close stops all the workers in the pool waiting for the jobs to finish. 117 | func (wp *WorkerPool) Close() { 118 | wp.SetWorkers(0) 119 | wp.wg.Wait() 120 | wp.scheduler.finish() 121 | wp.opts.Metrics.Stop(false) 122 | } 123 | 124 | // Stop stops all the workers in the pool immediately. 125 | func (wp *WorkerPool) Stop() { 126 | <-wp.resize 127 | defer func() { wp.resize <- struct{}{} }() 128 | 129 | for _, w := range wp.workers { 130 | w.stop(true) 131 | } 132 | 133 | wp.wg.Wait() 134 | wp.workers = nil 135 | wp.scheduler.finish() 136 | wp.opts.Metrics.Stop(true) 137 | } 138 | 139 | type hollowMetricsCollector struct{} 140 | 141 | var _ MetricsCollector = (*hollowMetricsCollector)(nil) 142 | 143 | func (mc *hollowMetricsCollector) Start() {} 144 | func (mc *hollowMetricsCollector) Stop(bool) {} 145 | func (mc *hollowMetricsCollector) Success(Job) {} 146 | func (mc *hollowMetricsCollector) Fail(Job) {} 147 | func (mc *hollowMetricsCollector) Discover(Job) {} 148 | -------------------------------------------------------------------------------- /provider/updates.go: -------------------------------------------------------------------------------- 1 | package provider 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/src-d/gitcollector" 7 | "github.com/src-d/gitcollector/library" 8 | "github.com/src-d/go-borges" 9 | "gopkg.in/src-d/go-errors.v1" 10 | ) 11 | 12 | var ( 13 | // ErrUpdatesStopped is returned when a provider has been stopped. 14 | ErrUpdatesStopped = errors.NewKind("provider stopped") 15 | 16 | // ErrUpdatesStop is returned when a provider fails on Stop. 17 | ErrUpdatesStop = errors.NewKind("provider failed on stop") 18 | ) 19 | 20 | // UpdatesOpts represents configuration options for an Updates. 21 | type UpdatesOpts struct { 22 | // TriggerOnce triggers the update just once and exits. 23 | TriggerOnce bool 24 | // TriggerInterval is the time interval elapsed between updates. 25 | TriggerInterval time.Duration 26 | // EnqueueTimeout is the time a job waits to be enqueued. 27 | EnqueueTimeout time.Duration 28 | // StopTimeout is the time the service waits to be stopped after a Stop 29 | // call is performed. 30 | StopTimeout time.Duration 31 | } 32 | 33 | // Updates is a gitcollector.Provider implementation. It will periodically 34 | // trigger the gitcollector.Jobs production to update the git repositories hold 35 | // in a borges.Library 36 | type Updates struct { 37 | lib borges.Library 38 | queue chan<- gitcollector.Job 39 | cancel chan struct{} 40 | opts *UpdatesOpts 41 | } 42 | 43 | var _ gitcollector.Provider = (*Updates)(nil) 44 | 45 | const ( 46 | triggerInterval = 24 * 7 * time.Hour 47 | stopTimeout = 500 * time.Microsecond 48 | enqueueTimeout = 500 * time.Second 49 | ) 50 | 51 | // NewUpdates builds a new Updates. 52 | func NewUpdates( 53 | lib borges.Library, 54 | queue chan<- gitcollector.Job, 55 | opts *UpdatesOpts, 56 | ) *Updates { 57 | if opts == nil { 58 | opts = &UpdatesOpts{} 59 | } 60 | 61 | if opts.TriggerInterval <= 0 { 62 | opts.TriggerInterval = triggerInterval 63 | } 64 | 65 | if opts.StopTimeout <= 0 { 66 | opts.StopTimeout = stopTimeout 67 | } 68 | 69 | if opts.EnqueueTimeout <= 0 { 70 | opts.EnqueueTimeout = enqueueTimeout 71 | } 72 | 73 | return &Updates{ 74 | lib: lib, 75 | queue: queue, 76 | cancel: make(chan struct{}), 77 | opts: opts, 78 | } 79 | } 80 | 81 | // Start implements the gitcollector.Provider interface. 82 | func (p *Updates) Start() error { 83 | if err := p.update(); err != nil { 84 | return err 85 | } 86 | 87 | if p.opts.TriggerOnce { 88 | return ErrUpdatesStopped.New() 89 | } 90 | 91 | for { 92 | select { 93 | case <-p.cancel: 94 | return ErrUpdatesStopped.New() 95 | case <-time.After(p.opts.TriggerInterval): 96 | if err := p.update(); err != nil { 97 | return err 98 | } 99 | } 100 | } 101 | } 102 | 103 | var errEnqueueTimeout = errors.NewKind("update queue is full") 104 | 105 | func (p *Updates) update() error { 106 | var done = make(chan error) 107 | go func() { 108 | defer close(done) 109 | 110 | iter, err := p.lib.Locations() 111 | if err != nil { 112 | done <- err 113 | return 114 | } 115 | 116 | iter.ForEach(func(l borges.Location) error { 117 | job := &library.Job{ 118 | Type: library.JobUpdate, 119 | LocationID: l.ID(), 120 | } 121 | 122 | select { 123 | case p.queue <- job: 124 | return nil 125 | case <-time.After(p.opts.EnqueueTimeout): 126 | return errEnqueueTimeout.New() 127 | } 128 | }) 129 | }() 130 | 131 | select { 132 | case <-p.cancel: 133 | return ErrUpdatesStopped.New() 134 | case err := <-done: 135 | if err != nil { 136 | return err 137 | } 138 | } 139 | 140 | return nil 141 | } 142 | 143 | // Stop implements the gitcollector.Provider interface. 144 | func (p *Updates) Stop() error { 145 | select { 146 | case p.cancel <- struct{}{}: 147 | return nil 148 | case <-time.After(p.opts.StopTimeout): 149 | return ErrUpdatesStop.New() 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /integration/helper.go: -------------------------------------------------------------------------------- 1 | package integration 2 | 3 | import ( 4 | "database/sql" 5 | "fmt" 6 | "io/ioutil" 7 | "math/rand" 8 | "os" 9 | "path/filepath" 10 | "time" 11 | 12 | "github.com/src-d/gitcollector/cmd/gitcollector/subcmd" 13 | 14 | "github.com/ory/dockertest" 15 | ) 16 | 17 | const ( 18 | dockerImage = "postgres" 19 | dockerVersion = "9.6" 20 | ) 21 | 22 | type helper struct { 23 | address string 24 | cmd subcmd.DownloadCmd 25 | cli *sql.DB 26 | closers []func() 27 | } 28 | 29 | func NewHelper(orgs string) (*helper, error) { 30 | lib, err := ioutil.TempDir("", "gcol-lib") 31 | if err != nil { 32 | removePaths(lib) 33 | return nil, err 34 | } 35 | 36 | tmp, err := ioutil.TempDir("", "gcol-tmp") 37 | if err != nil { 38 | removePaths(tmp, lib) 39 | return nil, err 40 | } 41 | 42 | addr, contClose, err := preparePostgres() 43 | if err != nil { 44 | return nil, err 45 | } 46 | 47 | cli, err := sql.Open("postgres", getURI(addr)) 48 | if err != nil { 49 | contClose() 50 | return nil, err 51 | } 52 | 53 | return &helper{ 54 | address: addr, 55 | cmd: subcmd.DownloadCmd{ 56 | Orgs: orgs, 57 | LibPath: lib, 58 | TmpPath: tmp, 59 | Token: os.Getenv("GITHUB_TOKEN"), 60 | }, 61 | cli: cli, 62 | closers: []func(){ 63 | func() { 64 | cli.Close() 65 | contClose() 66 | }, 67 | contClose, 68 | }, 69 | }, nil 70 | } 71 | 72 | func (h *helper) Exec() error { return h.cmd.Execute(nil) } 73 | 74 | func (h *helper) CreateDB() (string, error) { 75 | r := rand.New(rand.NewSource(time.Now().UnixNano())) 76 | dbTestName := fmt.Sprintf("testing%d", r.Uint64()) 77 | _, err := h.cli.Exec("create database " + dbTestName) 78 | if err != nil { 79 | return "", err 80 | } 81 | 82 | return h.getDBURI(dbTestName), nil 83 | } 84 | 85 | func (h *helper) getDBURI(dnName string) string { 86 | //"postgres://testing:testing@0.0.0.0:5432/%s?sslmode=disable&connect_timeout=10" 87 | return "postgres://postgres:postgres@" + h.address + "/" + dnName + "?sslmode=disable&connect_timeout=10" 88 | } 89 | 90 | func (h *helper) Cleanup() { 91 | removeDirsContents(h.cmd.TmpPath, h.cmd.LibPath) 92 | } 93 | 94 | func (h *helper) Close() { 95 | for _, c := range h.closers { 96 | c() 97 | } 98 | removePaths(h.cmd.TmpPath, h.cmd.LibPath) 99 | } 100 | 101 | // preparePostgres runs postgres container and waits until endpoint is accessible 102 | // returns endpoint's hostport, container close method and error 103 | func preparePostgres() (string, func(), error) { 104 | pool, err := dockertest.NewPool("") 105 | if err != nil { 106 | return "", func() {}, err 107 | } 108 | 109 | cont, err := pool.Run(dockerImage, dockerVersion, []string{ 110 | "POSTGRES_PASSWORD=postgres", 111 | }) 112 | if err != nil { 113 | return "", func() {}, err 114 | } 115 | 116 | const port = "5432/tcp" 117 | addr := cont.GetHostPort(port) 118 | if err := pool.Retry(func() error { 119 | cli, err := sql.Open("postgres", getURI(addr)) 120 | if err != nil { 121 | return err 122 | } 123 | defer cli.Close() 124 | return cli.Ping() 125 | }); err != nil { 126 | cont.Close() 127 | return "", func() {}, err 128 | } 129 | 130 | return addr, func() { 131 | cont.Close() 132 | }, nil 133 | } 134 | 135 | func getURI(hostPort string) string { 136 | return "postgres://postgres:postgres@" + hostPort + "?sslmode=disable&connect_timeout=10" 137 | } 138 | 139 | func removePaths(paths ...string) { 140 | for _, p := range paths { 141 | os.RemoveAll(p) 142 | } 143 | } 144 | 145 | func removeDirsContents(dirs ...string) { 146 | for _, d := range dirs { 147 | removeContents(d) 148 | } 149 | } 150 | 151 | func removeContents(dir string) error { 152 | d, err := os.Open(dir) 153 | if err != nil { 154 | return err 155 | } 156 | defer d.Close() 157 | names, err := d.Readdirnames(-1) 158 | if err != nil { 159 | return err 160 | } 161 | for _, name := range names { 162 | err = os.RemoveAll(filepath.Join(dir, name)) 163 | if err != nil { 164 | return err 165 | } 166 | } 167 | return nil 168 | } 169 | -------------------------------------------------------------------------------- /discovery/github-iterator.go: -------------------------------------------------------------------------------- 1 | package discovery 2 | 3 | import ( 4 | "context" 5 | "net/http" 6 | "time" 7 | 8 | "github.com/google/go-github/v28/github" 9 | "golang.org/x/oauth2" 10 | ) 11 | 12 | // GHRepositoriesIter represents an iterator of *github.Repositories 13 | type GHRepositoriesIter interface { 14 | Next(context.Context) (*github.Repository, time.Duration, error) 15 | } 16 | 17 | // GHReposIterOpts represents configuration options for a GHReposIter. 18 | type GHReposIterOpts struct { 19 | HTTPTimeout time.Duration 20 | ResultsPerPage int 21 | TimeNewRepos time.Duration 22 | AuthToken string 23 | } 24 | 25 | const ( 26 | httpTimeout = 30 * time.Second 27 | resultsPerPage = 100 28 | waitNewRepos = 24 * time.Hour 29 | ) 30 | 31 | // GHOrgReposIter is a GHRepositoriesIter by organization name. 32 | type GHOrgReposIter struct { 33 | org string 34 | excludedRepos map[string]struct{} 35 | client *github.Client 36 | repos []*github.Repository 37 | checkpoint int 38 | opts *github.RepositoryListByOrgOptions 39 | waitNewRepos time.Duration 40 | } 41 | 42 | var _ GHRepositoriesIter = (*GHOrgReposIter)(nil) 43 | 44 | // NewGHOrgReposIter builds a new GHOrgReposIter. 45 | func NewGHOrgReposIter(org string, excludedRepos []string, opts *GHReposIterOpts) *GHOrgReposIter { 46 | if opts == nil { 47 | opts = &GHReposIterOpts{} 48 | } 49 | 50 | to := opts.HTTPTimeout 51 | if to <= 0 { 52 | to = httpTimeout 53 | } 54 | 55 | rpp := opts.ResultsPerPage 56 | if rpp <= 0 || rpp > 100 { 57 | rpp = resultsPerPage 58 | } 59 | 60 | wnr := opts.TimeNewRepos 61 | if wnr <= 0 { 62 | wnr = waitNewRepos 63 | } 64 | 65 | excludedReposSet := make(map[string]struct{}) 66 | for _, excludedRepo := range excludedRepos { 67 | excludedReposSet[excludedRepo] = struct{}{} 68 | } 69 | 70 | return &GHOrgReposIter{ 71 | org: org, 72 | excludedRepos: excludedReposSet, 73 | client: newGithubClient(opts.AuthToken, to), 74 | opts: &github.RepositoryListByOrgOptions{ 75 | ListOptions: github.ListOptions{PerPage: rpp}, 76 | }, 77 | waitNewRepos: wnr, 78 | } 79 | } 80 | 81 | func newGithubClient(token string, timeout time.Duration) *github.Client { 82 | var client *http.Client 83 | if token == "" { 84 | client = &http.Client{} 85 | } else { 86 | client = oauth2.NewClient( 87 | context.Background(), 88 | oauth2.StaticTokenSource( 89 | &oauth2.Token{AccessToken: token}, 90 | ), 91 | ) 92 | } 93 | 94 | client.Timeout = timeout 95 | return github.NewClient(client) 96 | } 97 | 98 | // Next implements the GHRepositoriesIter interface. 99 | func (p *GHOrgReposIter) Next( 100 | ctx context.Context, 101 | ) (*github.Repository, time.Duration, error) { 102 | for { 103 | if len(p.repos) == 0 { 104 | retry, err := p.requestRepos(ctx) 105 | if err != nil && len(p.repos) == 0 { 106 | return nil, retry, err 107 | } 108 | } 109 | 110 | var next *github.Repository 111 | next, p.repos = p.repos[0], p.repos[1:] 112 | if _, ok := p.excludedRepos[next.GetName()]; !ok { 113 | return next, 0, nil 114 | } 115 | } 116 | } 117 | 118 | func (p *GHOrgReposIter) requestRepos( 119 | ctx context.Context, 120 | ) (time.Duration, error) { 121 | repos, res, err := p.client.Repositories.ListByOrg( 122 | ctx, 123 | p.org, 124 | p.opts, 125 | ) 126 | 127 | if err != nil { 128 | if _, ok := err.(*github.RateLimitError); !ok { 129 | return -1, err 130 | } 131 | 132 | return timeToRetry(res), ErrRateLimitExceeded.Wrap(err) 133 | } 134 | 135 | bufRepos := repos 136 | if p.checkpoint > 0 { 137 | i := p.checkpoint 138 | if len(repos) < p.checkpoint { 139 | // return err? 140 | i = 0 141 | } 142 | 143 | bufRepos = repos[i:] 144 | } 145 | 146 | if len(repos) < p.opts.PerPage { 147 | p.checkpoint = len(repos) 148 | } 149 | 150 | err = nil 151 | if res.NextPage == 0 { 152 | if len(repos) == p.opts.PerPage { 153 | p.opts.Page++ 154 | } 155 | 156 | err = ErrNewRepositoriesNotFound.New() 157 | } else { 158 | p.opts.Page = res.NextPage 159 | } 160 | 161 | p.repos = bufRepos 162 | return p.waitNewRepos, err 163 | } 164 | 165 | func timeToRetry(res *github.Response) time.Duration { 166 | now := time.Now().UTC().Unix() 167 | resetTime := res.Rate.Reset.UTC().Unix() 168 | timeToReset := time.Duration(resetTime-now) * time.Second 169 | remaining := res.Rate.Remaining 170 | if timeToReset < 0 || timeToReset > 1*time.Hour { 171 | // If this happens, the system clock is probably wrong, so we 172 | // assume we are at the beginning of the window and consider 173 | // only total requests per hour. 174 | timeToReset = 1 * time.Hour 175 | remaining = res.Rate.Limit 176 | } 177 | 178 | return timeToReset / time.Duration(remaining+1) 179 | } 180 | -------------------------------------------------------------------------------- /downloader/download.go: -------------------------------------------------------------------------------- 1 | package downloader 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "path/filepath" 7 | "time" 8 | 9 | "github.com/src-d/gitcollector/library" 10 | "github.com/src-d/gitcollector/updater" 11 | 12 | "github.com/src-d/go-borges" 13 | "github.com/src-d/go-borges/siva" 14 | "gopkg.in/src-d/go-billy.v4" 15 | "gopkg.in/src-d/go-billy.v4/util" 16 | "gopkg.in/src-d/go-errors.v1" 17 | "gopkg.in/src-d/go-log.v1" 18 | ) 19 | 20 | var ( 21 | // ErrNotDownloadJob is returned when a not download job is found. 22 | ErrNotDownloadJob = errors.NewKind("not download job") 23 | 24 | // ErrRepoAlreadyExists is returned if there is an attempt to 25 | // retrieve an already downloaded git repository. 26 | ErrRepoAlreadyExists = errors.NewKind("%s already downloaded") 27 | ) 28 | 29 | // Download is a library.JobFn function to download a git repository and store 30 | // it in a borges.Library. 31 | func Download(ctx context.Context, job *library.Job) error { 32 | logger := job.Logger.New(log.Fields{"job": "download", "id": job.ID}) 33 | if job.Type != library.JobDownload || 34 | len(job.Endpoints()) == 0 || 35 | job.Lib == nil || 36 | job.TempFS == nil { 37 | err := ErrNotDownloadJob.New() 38 | logger.Errorf(err, "wrong job") 39 | return err 40 | } 41 | 42 | lib, ok := (job.Lib).(*siva.Library) 43 | if !ok { 44 | err := library.ErrNotSivaLibrary.New() 45 | logger.Errorf(err, "wrong library") 46 | return err 47 | } 48 | 49 | endpoint := job.Endpoints()[0] 50 | logger = logger.New(log.Fields{"url": endpoint}) 51 | 52 | repoID, err := library.NewRepositoryID(endpoint) 53 | if err != nil { 54 | logger.Errorf(err, "wrong repository endpoint %s", endpoint) 55 | return err 56 | } 57 | 58 | ok, locID, err := libHas(ctx, lib, repoID) 59 | if err != nil { 60 | logger.Errorf(err, "failed") 61 | return err 62 | } 63 | 64 | if ok { 65 | if job.AllowUpdate { 66 | job.Type = library.JobUpdate 67 | job.LocationID = locID 68 | return updater.Update(ctx, job) 69 | } 70 | 71 | err := ErrRepoAlreadyExists.New(repoID) 72 | logger.Infof(err.Error()) 73 | return err 74 | } 75 | 76 | logger.Infof("started") 77 | start := time.Now() 78 | if err := downloadRepository( 79 | ctx, 80 | logger, 81 | lib, 82 | job.TempFS, 83 | repoID, 84 | endpoint, 85 | job.AuthToken, 86 | ); err != nil { 87 | logger.Errorf(err, "failed") 88 | return err 89 | } 90 | 91 | elapsed := time.Since(start).String() 92 | logger.With(log.Fields{"elapsed": elapsed}).Infof("finished") 93 | return nil 94 | } 95 | 96 | func libHas( 97 | ctx context.Context, 98 | lib borges.Library, 99 | id borges.RepositoryID, 100 | ) (bool, borges.LocationID, error) { 101 | var ( 102 | ok bool 103 | locID borges.LocationID 104 | err error 105 | done = make(chan struct{}) 106 | ) 107 | 108 | go func() { 109 | ok, _, locID, err = lib.Has(id) 110 | close(done) 111 | }() 112 | 113 | select { 114 | case <-done: 115 | case <-ctx.Done(): 116 | return false, "", ctx.Err() 117 | } 118 | 119 | return ok, locID, err 120 | } 121 | 122 | func downloadRepository( 123 | ctx context.Context, 124 | logger log.Logger, 125 | lib *siva.Library, 126 | tmp billy.Filesystem, 127 | id borges.RepositoryID, 128 | endpoint string, 129 | authToken library.AuthTokenFn, 130 | ) error { 131 | clonePath := filepath.Join( 132 | cloneRootPath, 133 | fmt.Sprintf("%s_%d", id, time.Now().UnixNano()), 134 | ) 135 | 136 | token := authToken(endpoint) 137 | 138 | start := time.Now() 139 | repo, err := CloneRepository( 140 | ctx, tmp, clonePath, endpoint, id.String(), token, 141 | ) 142 | 143 | if err != nil { 144 | return err 145 | } 146 | 147 | elapsed := time.Since(start).String() 148 | logger.With(log.Fields{"elapsed": elapsed}).Debugf("cloned") 149 | 150 | defer func() { 151 | if err := util.RemoveAll(tmp, clonePath); err != nil { 152 | logger.Warningf("couldn't remove %s", clonePath) 153 | } 154 | }() 155 | 156 | start = time.Now() 157 | root, err := RootCommit(repo, id.String()) 158 | if err != nil { 159 | return err 160 | } 161 | 162 | elapsed = time.Since(start).String() 163 | logger.With(log.Fields{ 164 | "elapsed": elapsed, 165 | "root": root.Hash.String(), 166 | }).Debugf("root commit found") 167 | 168 | start = time.Now() 169 | locID := borges.LocationID(root.Hash.String()) 170 | r, err := PrepareRepository( 171 | ctx, lib, locID, id, endpoint, tmp, clonePath, 172 | ) 173 | 174 | if err != nil { 175 | return err 176 | } 177 | 178 | elapsed = time.Since(start).String() 179 | logger.With(log.Fields{ 180 | "elapsed": elapsed, 181 | }).Debugf("rooted repository ready") 182 | 183 | start = time.Now() 184 | if err := FetchChanges(ctx, r, id.String(), token); err != nil { 185 | return err 186 | } 187 | 188 | elapsed = time.Since(start).String() 189 | logger.With(log.Fields{"elapsed": elapsed}).Debugf("fetched") 190 | 191 | start = time.Now() 192 | if err := r.Commit(); err != nil { 193 | return err 194 | } 195 | 196 | elapsed = time.Since(start).String() 197 | logger.With(log.Fields{"elapsed": elapsed}).Debugf("commited") 198 | return nil 199 | } 200 | -------------------------------------------------------------------------------- /updater/update.go: -------------------------------------------------------------------------------- 1 | package updater 2 | 3 | import ( 4 | "context" 5 | "time" 6 | 7 | "github.com/src-d/gitcollector/library" 8 | "github.com/src-d/go-borges" 9 | "github.com/src-d/go-borges/siva" 10 | "gopkg.in/src-d/go-errors.v1" 11 | "gopkg.in/src-d/go-git.v4" 12 | "gopkg.in/src-d/go-git.v4/plumbing/transport/http" 13 | "gopkg.in/src-d/go-log.v1" 14 | ) 15 | 16 | var ( 17 | // ErrNotUpdateJob is returned when a not update job is found. 18 | ErrNotUpdateJob = errors.NewKind("not update job") 19 | ) 20 | 21 | // Update is a library.JobFn function to update a git repository alreayd stored 22 | // in a borges.Library. 23 | func Update(ctx context.Context, job *library.Job) error { 24 | logger := job.Logger.New(log.Fields{"job": "update", "id": job.ID}) 25 | if job.Type != library.JobUpdate { 26 | err := ErrNotUpdateJob.New() 27 | logger.Errorf(err, "wrong job") 28 | return err 29 | } 30 | 31 | lib, ok := (job.Lib).(*siva.Library) 32 | if !ok { 33 | err := library.ErrNotSivaLibrary.New() 34 | logger.Errorf(err, "wrong library") 35 | return err 36 | } 37 | 38 | logger = logger.New(log.Fields{"location": job.LocationID}) 39 | location, err := lib.Location(job.LocationID) 40 | if err != nil { 41 | logger.Errorf(err, "failed") 42 | return err 43 | } 44 | 45 | loc, ok := location.(*siva.Location) 46 | if !ok { 47 | err := library.ErrNotSivaLocation.New() 48 | logger.Errorf(err, "wrong location") 49 | return err 50 | } 51 | 52 | repo, err := loc.Get("", borges.RWMode) 53 | if err != nil { 54 | logger.Errorf(err, "couldn't get repository") 55 | return err 56 | } 57 | 58 | var remote string 59 | if len(job.Endpoints()) == 1 { 60 | // job redirected from download 61 | ep := job.Endpoints()[0] 62 | 63 | logger = logger.New(log.Fields{"url": ep}) 64 | 65 | id, err := library.NewRepositoryID(ep) 66 | if err != nil { 67 | logger.Errorf(err, "wrong repository endpoint") 68 | return err 69 | } 70 | 71 | remote = id.String() 72 | } 73 | 74 | remotes, err := remotesToUpdate(repo, remote) 75 | if err != nil { 76 | logger.Errorf(err, "couldn't get remotes") 77 | return err 78 | } 79 | 80 | if len(job.Endpoints()) == 0 { 81 | // it will update the whole location, add all the endpoints 82 | // to be updated to the job 83 | var endpoints []string 84 | for _, remote := range remotes { 85 | endpoints = append(endpoints, remote.Config().URLs[0]) 86 | } 87 | 88 | job.SetEndpoints(endpoints) 89 | } 90 | 91 | logger.Infof("started") 92 | start := time.Now() 93 | if err := updateRepository( 94 | ctx, 95 | logger, 96 | repo, 97 | remotes, 98 | job.AuthToken, 99 | ); err != nil { 100 | logger.Errorf(err, "failed") 101 | return err 102 | } 103 | 104 | elapsed := time.Since(start).String() 105 | logger.With(log.Fields{"elapsed": elapsed}).Infof("finished") 106 | return nil 107 | } 108 | 109 | func remotesToUpdate( 110 | repo borges.Repository, 111 | remote string, 112 | ) ([]*git.Remote, error) { 113 | var ( 114 | remotes []*git.Remote 115 | err error 116 | ) 117 | 118 | if remote == "" { 119 | remotes, err = repo.R().Remotes() 120 | if err != nil { 121 | return nil, err 122 | } 123 | } else { 124 | r, err := repo.R().Remote(remote) 125 | if err != nil { 126 | return nil, err 127 | } 128 | 129 | remotes = append(remotes, r) 130 | } 131 | 132 | return remotes, nil 133 | } 134 | 135 | func updateRepository( 136 | ctx context.Context, 137 | logger log.Logger, 138 | repo borges.Repository, 139 | remotes []*git.Remote, 140 | authToken library.AuthTokenFn, 141 | ) error { 142 | var alreadyUpdated int 143 | start := time.Now() 144 | for _, remote := range remotes { 145 | opts := &git.FetchOptions{} 146 | urls := remote.Config().URLs 147 | if len(urls) > 0 { 148 | token := authToken(urls[0]) 149 | if token != "" { 150 | opts.Auth = &http.BasicAuth{ 151 | Username: "gitcollector", 152 | Password: token, 153 | } 154 | } 155 | } 156 | 157 | err := remote.FetchContext(ctx, opts) 158 | if err != nil && err != git.NoErrAlreadyUpToDate { 159 | if err := repo.Close(); err != nil { 160 | logger.Warningf("couldn't close repository") 161 | } 162 | 163 | return err 164 | } 165 | 166 | name := remote.Config().Name 167 | if err == git.NoErrAlreadyUpToDate { 168 | alreadyUpdated++ 169 | logger.With(log.Fields{"remote": name}). 170 | Debugf("already up to date") 171 | } 172 | 173 | if err == nil { 174 | logger.With(log.Fields{"remote": name}). 175 | Debugf("updated") 176 | } 177 | } 178 | 179 | if len(remotes) == alreadyUpdated { 180 | elapsed := time.Since(start).String() 181 | logger.With(log.Fields{"elapsed": elapsed}). 182 | Debugf("location already up to date") 183 | return repo.Close() 184 | } 185 | 186 | elapsed := time.Since(start).String() 187 | logger.With(log.Fields{"elapsed": elapsed}).Debugf("fetched") 188 | 189 | start = time.Now() 190 | if err := repo.Commit(); err != nil { 191 | return err 192 | } 193 | 194 | elapsed = time.Since(start).String() 195 | logger.With(log.Fields{"elapsed": elapsed}).Debugf("commited") 196 | return nil 197 | } 198 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # gitcollector [![GitHub version](https://badge.fury.io/gh/src-d%2Fgitcollector.svg)](https://github.com/src-d/gitcollector/releases) [![Build Status](https://travis-ci.com/src-d/gitcollector.svg?branch=master)](https://travis-ci.com/src-d/gitcollector) [![codecov](https://codecov.io/gh/src-d/gitcollector/branch/master/graph/badge.svg)](https://codecov.io/gh/src-d/gitcollector) [![GoDoc](https://godoc.org/gopkg.in/src-d/gitcollector.v0?status.svg)](https://godoc.org/gopkg.in/src-d/gitcollector.v0) [![Go Report Card](https://goreportcard.com/badge/github.com/src-d/gitcollector)](https://goreportcard.com/report/github.com/src-d/gitcollector) 2 | 3 | **gitcollector** collects and stores git repositories. 4 | 5 | gitcollector is the source{d} tool to download and update git repositories at 6 | large scale. To that end, it uses a custom repository storage 7 | [file format](https://blog.sourced.tech/post/siva/) called [siva](https://github.com/src-d/go-siva) optimized for saving 8 | storage space and keeping repositories up-to-date. 9 | 10 | ## Status 11 | 12 | The project is in a preliminary stable stage and under active development. 13 | 14 | ## Storing repositories using rooted repositories 15 | 16 | A rooted repository is a [bare Git repository](http://www.saintsjd.com/2011/01/what-is-a-bare-git-repository/) that stores all objects from all repositories that share a common history, that is, they have the same initial commit. It is stored using the [Siva](https://github.com/src-d/go-siva) file format. 17 | 18 | ![Root Repository explanatory diagram](https://user-images.githubusercontent.com/5582506/30617179-2aba194a-9d95-11e7-8fd5-0a87c2a595f9.png) 19 | 20 | Rooted repositories have a few particularities that you should know to work with them effectively: 21 | 22 | - They have no `HEAD` reference. 23 | - All references are of the following form: `{REFERENCE_NAME}/{REMOTE_NAME}`. For example, the reference `refs/heads/master` of the remote `foo` would be `/refs/heads/master/foo`. 24 | - Each remote represents a repository that shares the common history of the rooted repository. A remote can have multiple endpoints. 25 | - A rooted repository is simply a repository with all the objects from all the repositories which share the same root commit. 26 | - The root commit for a repository is obtained following the first parent of each commit from HEAD. 27 | 28 | ## Getting started 29 | 30 | ### Plain command 31 | 32 | gitcollector entry point usage is done through the subcommand `download` (at this time is the only subcommand): 33 | 34 | ```txt 35 | Usage: 36 | gitcollector [OPTIONS] download [download-OPTIONS] 37 | 38 | Help Options: 39 | -h, --help Show this help message 40 | 41 | [download command options] 42 | --library= path where download to [$GITCOLLECTOR_LIBRARY] 43 | --bucket= library bucketization level (default: 2) [$GITCOLLECTOR_LIBRARY_BUCKET] 44 | --tmp= directory to place generated temporal files (default: /tmp) [$GITCOLLECTOR_TMP] 45 | --workers= number of workers, default to GOMAXPROCS [$GITCOLLECTOR_WORKERS] 46 | --half-cpu set the number of workers to half of the set workers [$GITCOLLECTOR_HALF_CPU] 47 | --no-updates don't allow updates on already downloaded repositories [$GITCOLLECTOR_NO_UPDATES] 48 | --no-forks github forked repositories will not be downloaded [$GITCOLLECTOR_NO_FORKS] 49 | --orgs= list of github organization names separated by comma [$GITHUB_ORGANIZATIONS] 50 | --excluded-repos= list of repos to exclude separated by comma [$GITCOLLECTOR_EXCLUDED_REPOS] 51 | --token= github token [$GITHUB_TOKEN] 52 | --metrics-db= uri to a database where metrics will be sent [$GITCOLLECTOR_METRICS_DB_URI] 53 | --metrics-db-table= table name where the metrics will be added (default: gitcollector_metrics) [$GITCOLLECTOR_METRICS_DB_TABLE] 54 | --metrics-sync-timeout= timeout in seconds to send metrics (default: 30) [$GITCOLLECTOR_METRICS_SYNC] 55 | 56 | Log Options: 57 | --log-level=[info|debug|warning|error] Logging level (default: info) [$LOG_LEVEL] 58 | --log-format=[text|json] log format, defaults to text on a terminal and json otherwise [$LOG_FORMAT] 59 | --log-fields= default fields for the logger, specified in json [$LOG_FIELDS] 60 | --log-force-format ignore if it is running on a terminal or not [$LOG_FORCE_FORMAT] 61 | ``` 62 | 63 | Usage example, `--library` and `--orgs` are always required: 64 | 65 | > gitcollector download --library=/path/to/repos/directoy --orgs=src-d 66 | 67 | To collect repositories from several github organizations: 68 | 69 | > gitcollector download --library=/path/to/repos/directoy --orgs=src-d,bblfsh 70 | 71 | Note that all the download command options are also configurable with environment variables. 72 | 73 | ### Docker 74 | 75 | gitcollector upload a new docker image to [docker hub](https://hub.docker.com/r/srcd/gitcollector/tags) on each new release. To use it: 76 | 77 | ``` sh 78 | docker run --rm --name gitcollector_1 \ 79 | -e "GITHUB_ORGANIZATIONS=src-d,bblfsh" \ 80 | -e "GITHUB_TOKEN=foo" \ 81 | -v /path/to/repos/directory:/library \ 82 | srcd/gitcollector:latest 83 | ``` 84 | 85 | Note that you must mount a local directory into the specific container path shown in `-v /path/to/repos/directory:/library`. This directory is where the repositories will be downloaded into rooted repositories in siva files format. 86 | 87 | ## License 88 | 89 | GPL v3.0, see [LICENSE](LICENSE) 90 | -------------------------------------------------------------------------------- /metrics/metrics_test.go: -------------------------------------------------------------------------------- 1 | package metrics 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "strconv" 7 | "strings" 8 | "testing" 9 | "time" 10 | 11 | "github.com/src-d/gitcollector/library" 12 | "github.com/stretchr/testify/require" 13 | ) 14 | 15 | func TestMetricsCollectorBatch(t *testing.T) { 16 | var discover, download, update, fail, total uint64 17 | mc := NewCollector(&CollectorOpts{ 18 | BatchSize: 10, 19 | SyncTime: 1 * time.Hour, 20 | Send: func( 21 | ctx context.Context, 22 | mc *Collector, 23 | _ *library.Job, 24 | ) error { 25 | discover = mc.discoverCount 26 | download = mc.successDownloadCount 27 | update = mc.successUpdateCount 28 | fail = mc.failCount 29 | 30 | next := discover + download + update + fail 31 | require.True(t, next > total) 32 | require.True(t, next-total >= 10) 33 | total = next 34 | return nil 35 | }, 36 | }) 37 | 38 | go mc.Start() 39 | 40 | var countOne, countThree int 41 | for i := 0; i < 1000; i++ { 42 | job := &library.Job{} 43 | job.SetEndpoints([]string{ 44 | fmt.Sprintf("ep-%d-1", i), 45 | fmt.Sprintf("ep-%d-2", i), 46 | fmt.Sprintf("ep-%d-3", i), 47 | }) 48 | 49 | switch i % 5 { 50 | case 0: 51 | job.Type = library.JobDownload 52 | mc.Success(job) 53 | countOne++ 54 | case 1: 55 | job.Type = library.JobDownload 56 | mc.Discover(job) 57 | countOne++ 58 | case 2: 59 | job.Type = library.JobDownload 60 | mc.Fail(job) 61 | countThree++ 62 | case 3: 63 | job.Type = library.JobUpdate 64 | mc.Success(job) 65 | countThree++ 66 | case 4: 67 | job.Type = library.JobUpdate 68 | mc.Fail(job) 69 | countThree++ 70 | } 71 | } 72 | 73 | mc.Stop(false) 74 | expected := uint64(countOne + countThree*3) 75 | require.Equal(t, expected, total) 76 | } 77 | 78 | func TestMetricsCollectorTime(t *testing.T) { 79 | var count int 80 | mc := NewCollector(&CollectorOpts{ 81 | BatchSize: 1000000, 82 | SyncTime: 1 * time.Second, 83 | Send: func( 84 | ctx context.Context, 85 | mc *Collector, 86 | _ *library.Job, 87 | ) error { 88 | count++ 89 | return nil 90 | }, 91 | }) 92 | 93 | go mc.Start() 94 | 95 | func() { 96 | done := time.After(3 * time.Second) 97 | for { 98 | select { 99 | case <-done: 100 | return 101 | default: 102 | job := &library.Job{ 103 | Type: library.JobDownload, 104 | } 105 | job.SetEndpoints([]string{"foo-ep"}) 106 | 107 | mc.Success(job) 108 | time.Sleep(100 * time.Millisecond) 109 | } 110 | } 111 | }() 112 | 113 | mc.Stop(false) 114 | require.Equal(t, 3, count) 115 | } 116 | 117 | func TestMetricsCollectorByOrg(t *testing.T) { 118 | mc := NewCollectorByOrg(map[string]*Collector{ 119 | "org1": NewCollector(&CollectorOpts{}), 120 | "org2": NewCollector(&CollectorOpts{}), 121 | "org3": NewCollector(&CollectorOpts{}), 122 | }) 123 | 124 | go mc.Start() 125 | 126 | orgs := []string{"org1", "org2", "org3"} 127 | const url = "https://github.com/%s/foo-%d" 128 | for i := 0; i < 999; i++ { 129 | ep := fmt.Sprintf(url, orgs[i%len(orgs)], i) 130 | job := &library.Job{} 131 | job.SetEndpoints([]string{ep}) 132 | 133 | switch i % 5 { 134 | case 0: 135 | job.Type = library.JobDownload 136 | mc.Success(job) 137 | case 1: 138 | job.Type = library.JobDownload 139 | mc.Discover(job) 140 | case 2: 141 | job.Type = library.JobDownload 142 | mc.Fail(job) 143 | case 3: 144 | job.Type = library.JobUpdate 145 | mc.Success(job) 146 | case 4: 147 | job.Type = library.JobUpdate 148 | mc.Fail(job) 149 | } 150 | } 151 | 152 | mc.Stop(false) 153 | var total uint64 154 | for _, m := range mc.orgMetrics { 155 | subTotal := m.discoverCount + m.successDownloadCount + 156 | m.successUpdateCount + m.failCount 157 | 158 | require.Equal(t, uint64(333), subTotal) 159 | total += subTotal 160 | } 161 | 162 | require.Equal(t, uint64(999), total) 163 | } 164 | 165 | type closeDelayCase struct { 166 | name string 167 | immediate bool 168 | syncTime time.Duration 169 | delay time.Duration 170 | expCounter int 171 | } 172 | 173 | func TestClosesWithDelay(t *testing.T) { 174 | for _, c := range []closeDelayCase{ 175 | {"ImmediateWithoutDelay", true, time.Second, 0, 0}, 176 | {"ImmediateWithDelay", true, 500 * time.Millisecond, time.Second, 1}, 177 | {"NonImmediateWithoutDelay", false, time.Second, 0, 1}, 178 | {"NonImmediateWithDelay", false, 500 * time.Millisecond, time.Second, 2}, 179 | } { 180 | c := c 181 | t.Run(c.name, func(t *testing.T) { 182 | testCloseDelayCollector(t, c) 183 | }) 184 | } 185 | } 186 | 187 | func testCloseDelayCollector(t *testing.T, c closeDelayCase) { 188 | var counter int 189 | mc := NewCollector(&CollectorOpts{ 190 | SyncTime: c.syncTime, 191 | Send: func( 192 | ctx context.Context, 193 | mc *Collector, 194 | _ *library.Job, 195 | ) error { 196 | counter++ 197 | return nil 198 | }, 199 | }) 200 | 201 | go mc.Start() 202 | time.Sleep(c.delay) 203 | 204 | mc.Success(getJob(library.JobDownload)) 205 | mc.Success(getJob(library.JobUpdate)) 206 | mc.Stop(c.immediate) 207 | 208 | require.Equal(t, c.expCounter, counter) 209 | } 210 | 211 | func TestFailedSend(t *testing.T) { 212 | for _, immediate := range []bool{false, true} { 213 | t.Run("TestFailedSendImmediate"+strings.Title(strconv.FormatBool(immediate)), 214 | func(t *testing.T) { 215 | testFailedSend(t, immediate) 216 | }) 217 | } 218 | } 219 | 220 | func testFailedSend(t *testing.T, stopImmediate bool) { 221 | mc := NewCollector(&CollectorOpts{ 222 | SyncTime: time.Second, 223 | Send: func( 224 | ctx context.Context, 225 | mc *Collector, 226 | _ *library.Job, 227 | ) error { 228 | return fmt.Errorf("mocked") 229 | }, 230 | }) 231 | 232 | go mc.Start() 233 | 234 | mc.Success(getJob(library.JobDownload)) 235 | mc.Success(getJob(library.JobUpdate)) 236 | mc.Stop(stopImmediate) 237 | 238 | // TODO maybe we can stabilize it? 239 | if stopImmediate { 240 | require.LessOrEqual(t, mc.successDownloadCount, uint64(1)) 241 | require.LessOrEqual(t, mc.successUpdateCount, uint64(1)) 242 | } else { 243 | require.Equal(t, uint64(1), mc.successDownloadCount) 244 | require.Equal(t, uint64(1), mc.successUpdateCount) 245 | } 246 | } 247 | 248 | func getJob(jobType int) *library.Job { 249 | job := &library.Job{ 250 | Type: library.JobType(jobType), 251 | } 252 | job.SetEndpoints([]string{"ep"}) 253 | return job 254 | } 255 | -------------------------------------------------------------------------------- /discovery/github.go: -------------------------------------------------------------------------------- 1 | package discovery 2 | 3 | import ( 4 | "context" 5 | "time" 6 | 7 | "github.com/google/go-github/v28/github" 8 | "github.com/jpillora/backoff" 9 | "gopkg.in/src-d/go-errors.v1" 10 | ) 11 | 12 | var ( 13 | // ErrEndpointsNotFound is the returned error when couldn't find 14 | // endpoints for a certain repository. 15 | ErrEndpointsNotFound = errors.NewKind("endpoinds not found for %s") 16 | 17 | // ErrNewRepositoriesNotFound is returned when there aren't new 18 | // repositories in the organization. 19 | ErrNewRepositoriesNotFound = errors.NewKind( 20 | "couldn't find new repositories") 21 | 22 | // ErrRateLimitExceeded is returned when the api rate limit is reached. 23 | ErrRateLimitExceeded = errors.NewKind("rate limit requests exceeded") 24 | 25 | // ErrDiscoveryStopped is returned when a discovery has been stopped. 26 | ErrDiscoveryStopped = errors.NewKind("discovery stopped") 27 | 28 | // ErrDiscoveryStop is returned when a discovery fails on Stop. 29 | ErrDiscoveryStop = errors.NewKind("discovery failed on stop") 30 | 31 | // ErrAdvertiseTimeout is returned when an advertise functions exceeds 32 | // the timeout. 33 | ErrAdvertiseTimeout = errors.NewKind("advertise repositories timeout") 34 | ) 35 | 36 | // AdvertiseGHRepositoriesFn is used by a GitHub to notify that a new 37 | // repository has been discovered. 38 | type AdvertiseGHRepositoriesFn func(context.Context, []*github.Repository) error 39 | 40 | // GitHubOpts represents configuration options for a GitHub discovery. 41 | type GitHubOpts struct { 42 | AdvertiseTimeout time.Duration 43 | SkipForks bool 44 | WaitNewRepos bool 45 | WaitOnRateLimit bool 46 | StopTimeout time.Duration 47 | MaxJobBuffer int 48 | BatchSize int 49 | } 50 | 51 | // GitHub will retrieve the information for all the repositories for the 52 | // given GHRepositoriesIterator. 53 | type GitHub struct { 54 | advertiseRepos AdvertiseGHRepositoriesFn 55 | iter GHRepositoriesIter 56 | batch []*github.Repository 57 | retryJobs []*github.Repository 58 | cancel chan struct{} 59 | backoff *backoff.Backoff 60 | opts *GitHubOpts 61 | } 62 | 63 | const ( 64 | stopTimeout = 10 * time.Second 65 | batchSize = 1 66 | ) 67 | 68 | // NewGitHub builds a new GitHub. 69 | func NewGitHub( 70 | advertiseRepos AdvertiseGHRepositoriesFn, 71 | iter GHRepositoriesIter, 72 | opts *GitHubOpts, 73 | ) *GitHub { 74 | if opts == nil { 75 | opts = &GitHubOpts{} 76 | } 77 | 78 | if opts.StopTimeout <= 0 { 79 | opts.StopTimeout = stopTimeout 80 | } 81 | 82 | if opts.BatchSize <= 0 { 83 | opts.BatchSize = batchSize 84 | } 85 | 86 | if opts.MaxJobBuffer <= 0 { 87 | opts.MaxJobBuffer = opts.BatchSize * 2 88 | } 89 | 90 | if opts.AdvertiseTimeout <= 0 { 91 | to := time.Duration(5*opts.BatchSize) * time.Second 92 | opts.AdvertiseTimeout = to 93 | } 94 | 95 | if advertiseRepos == nil { 96 | advertiseRepos = func( 97 | context.Context, 98 | []*github.Repository, 99 | ) error { 100 | return nil 101 | } 102 | } 103 | 104 | return &GitHub{ 105 | advertiseRepos: advertiseRepos, 106 | iter: iter, 107 | batch: make([]*github.Repository, 0, opts.BatchSize), 108 | retryJobs: make([]*github.Repository, 0, opts.MaxJobBuffer), 109 | cancel: make(chan struct{}), 110 | backoff: newBackoff(), 111 | opts: opts, 112 | } 113 | } 114 | 115 | func newBackoff() *backoff.Backoff { 116 | const ( 117 | minDuration = 500 * time.Millisecond 118 | maxDuration = 5 * time.Second 119 | factor = 4 120 | ) 121 | 122 | return &backoff.Backoff{ 123 | Min: minDuration, 124 | Max: maxDuration, 125 | Factor: factor, 126 | Jitter: true, 127 | } 128 | } 129 | 130 | // Start starts the GitHub. 131 | func (p *GitHub) Start() error { 132 | ctx, cancel := context.WithCancel(context.Background()) 133 | defer cancel() 134 | 135 | for { 136 | var err error 137 | defer func() { 138 | if ErrDiscoveryStopped.Is(err) && len(p.batch) > 0 { 139 | if de := p.sendBatch(ctx); err != nil { 140 | err = de 141 | } 142 | } 143 | }() 144 | 145 | done := make(chan struct{}) 146 | go func() { 147 | err = p.discoverRepositories(ctx) 148 | close(done) 149 | }() 150 | 151 | select { 152 | case <-done: 153 | if err != nil { 154 | return err 155 | } 156 | case <-p.cancel: 157 | return ErrDiscoveryStopped.New() 158 | } 159 | } 160 | } 161 | 162 | func (p *GitHub) discoverRepositories(ctx context.Context) error { 163 | if len(p.retryJobs) > 0 { 164 | job := p.retryJobs[0] 165 | p.retryJobs = p.retryJobs[1:] 166 | p.batch = append(p.batch, job) 167 | } else { 168 | repo, retry, err := p.iter.Next(ctx) 169 | if err != nil { 170 | if ErrNewRepositoriesNotFound.Is(err) && 171 | !p.opts.WaitNewRepos { 172 | return ErrDiscoveryStopped.Wrap(err) 173 | } 174 | 175 | if ErrRateLimitExceeded.Is(err) && 176 | !p.opts.WaitOnRateLimit { 177 | return ErrDiscoveryStopped.Wrap(err) 178 | } 179 | 180 | if retry <= 0 { 181 | return err 182 | } 183 | 184 | time.Sleep(retry) 185 | return nil 186 | } 187 | 188 | if p.opts.SkipForks && repo.GetFork() { 189 | return nil 190 | } 191 | 192 | p.batch = append(p.batch, repo) 193 | } 194 | 195 | if len(p.batch) < p.opts.BatchSize { 196 | return nil 197 | } 198 | 199 | ctxto, cancel := context.WithTimeout(ctx, p.opts.AdvertiseTimeout) 200 | defer cancel() 201 | 202 | if err := p.sendBatch(ctxto); err != nil { 203 | if !ErrAdvertiseTimeout.Is(err) { 204 | return err 205 | } 206 | 207 | time.Sleep(p.backoff.Duration()) 208 | } else { 209 | p.backoff.Reset() 210 | } 211 | 212 | return nil 213 | } 214 | 215 | func (p *GitHub) sendBatch(ctx context.Context) error { 216 | if err := p.advertiseRepos(ctx, p.batch); err != nil { 217 | return err 218 | } 219 | 220 | p.batch = make([]*github.Repository, 0, p.opts.BatchSize) 221 | return nil 222 | } 223 | 224 | // GetGHEndpoint gets the enpoint for a github repository. 225 | func GetGHEndpoint(r *github.Repository) (string, error) { 226 | var endpoint string 227 | getURLs := []func() string{ 228 | r.GetHTMLURL, 229 | r.GetGitURL, 230 | r.GetSSHURL, 231 | } 232 | 233 | for _, getURL := range getURLs { 234 | ep := getURL() 235 | if ep != "" { 236 | endpoint = ep 237 | break 238 | } 239 | } 240 | 241 | if endpoint == "" { 242 | return "", ErrEndpointsNotFound.New(r.GetFullName()) 243 | } 244 | 245 | return endpoint, nil 246 | } 247 | 248 | // Stop stops the GitHub. 249 | func (p *GitHub) Stop() error { 250 | select { 251 | case p.cancel <- struct{}{}: 252 | return nil 253 | case <-time.After(p.opts.StopTimeout): 254 | return ErrDiscoveryStop.New() 255 | } 256 | } 257 | -------------------------------------------------------------------------------- /library/job.go: -------------------------------------------------------------------------------- 1 | package library 2 | 3 | import ( 4 | "context" 5 | "sync" 6 | 7 | "github.com/src-d/gitcollector" 8 | "github.com/src-d/go-borges" 9 | "gopkg.in/src-d/go-billy.v4" 10 | "gopkg.in/src-d/go-errors.v1" 11 | "gopkg.in/src-d/go-log.v1" 12 | 13 | "github.com/google/uuid" 14 | ) 15 | 16 | var ( 17 | // ErrJobFnNotFound is returned when theres is no function to 18 | // process a job. 19 | ErrJobFnNotFound = errors.NewKind( 20 | "process function not found for library.Job") 21 | ) 22 | 23 | // JobType represents the type of the Job. 24 | type JobType uint8 25 | 26 | const ( 27 | // JobDownload represents a Download Job. 28 | JobDownload = 1 << iota 29 | // JobUpdate represents an Update Job. 30 | JobUpdate 31 | ) 32 | 33 | // Job represents a gitcollector.Job to perform a task on a borges.Library. 34 | type Job struct { 35 | mu sync.Mutex 36 | endpoints []string 37 | ID string 38 | Type JobType 39 | Lib borges.Library 40 | TempFS billy.Filesystem 41 | LocationID borges.LocationID 42 | AllowUpdate bool 43 | AuthToken AuthTokenFn 44 | ProcessFn JobFn 45 | Logger log.Logger 46 | } 47 | 48 | var _ gitcollector.Job = (*Job)(nil) 49 | 50 | // JobFn represents the task to be performed by a Job. 51 | type JobFn func(context.Context, *Job) error 52 | 53 | // TODO: we should probably secure other fiels 54 | func (j *Job) SetEndpoints(endpoints []string) { 55 | j.mu.Lock() 56 | defer j.mu.Unlock() 57 | 58 | j.endpoints = endpoints 59 | } 60 | 61 | func (j *Job) Endpoints() []string { 62 | j.mu.Lock() 63 | defer j.mu.Unlock() 64 | 65 | return j.endpoints 66 | } 67 | 68 | // Process implements the Job interface. 69 | func (j *Job) Process(ctx context.Context) error { 70 | if j.ProcessFn == nil { 71 | return ErrJobFnNotFound.New() 72 | } 73 | 74 | return j.ProcessFn(ctx, j) 75 | } 76 | 77 | // AuthTokenFn retrieve and authentication token if any for the given endpoint. 78 | type AuthTokenFn func(endpoint string) string 79 | 80 | func getAuthTokenByOrg(tokens map[string]string) AuthTokenFn { 81 | if tokens == nil { 82 | tokens = map[string]string{} 83 | } 84 | 85 | return func(endpoint string) string { 86 | org := GetOrgFromEndpoint(endpoint) 87 | return tokens[org] 88 | } 89 | } 90 | 91 | var ( 92 | errWrongJob = errors.NewKind("wrong job found") 93 | errNotJobID = errors.NewKind("couldn't assign an ID to a job") 94 | errClosedChan = errors.NewKind("channel closed") 95 | ) 96 | 97 | // NewDownloadJobScheduleFn builds a new gitcollector.ScheduleFn that only 98 | // schedules download jobs. 99 | func NewDownloadJobScheduleFn( 100 | lib borges.Library, 101 | download chan gitcollector.Job, 102 | downloadFn JobFn, 103 | updateOnDownload bool, 104 | authTokens map[string]string, 105 | jobLogger log.Logger, 106 | temp billy.Filesystem, 107 | ) gitcollector.JobScheduleFn { 108 | return func(ctx context.Context) (gitcollector.Job, error) { 109 | job, err := jobFrom(ctx, download) 110 | if err != nil { 111 | if errClosedChan.Is(err) { 112 | err = gitcollector.ErrJobSource.New() 113 | } 114 | 115 | return nil, err 116 | } 117 | 118 | job.Lib = lib 119 | job.TempFS = temp 120 | job.ProcessFn = downloadFn 121 | job.AllowUpdate = updateOnDownload 122 | job.AuthToken = getAuthTokenByOrg(authTokens) 123 | job.Logger = jobLogger 124 | return job, nil 125 | } 126 | } 127 | 128 | // NewUpdateJobScheduleFn builds a new gitcollector.SchedulerFn that only 129 | // schedules update jobs. 130 | func NewUpdateJobScheduleFn( 131 | lib borges.Library, 132 | update chan gitcollector.Job, 133 | updateFn JobFn, 134 | authTokens map[string]string, 135 | jobLogger log.Logger, 136 | ) gitcollector.JobScheduleFn { 137 | return func(ctx context.Context) (gitcollector.Job, error) { 138 | job, err := jobFrom(ctx, update) 139 | if err != nil { 140 | if errClosedChan.Is(err) { 141 | err = gitcollector.ErrJobSource.New() 142 | } 143 | 144 | return nil, err 145 | } 146 | 147 | job.Lib = lib 148 | job.ProcessFn = updateFn 149 | job.AuthToken = getAuthTokenByOrg(authTokens) 150 | job.Logger = jobLogger 151 | return job, nil 152 | } 153 | } 154 | 155 | // NewJobScheduleFn builds a new gitcollector.ScheduleFn that schedules download 156 | // and update jobs in different queues. 157 | func NewJobScheduleFn( 158 | lib borges.Library, 159 | download, update chan gitcollector.Job, 160 | downloadFn, updateFn JobFn, 161 | updateOnDownload bool, 162 | authTokens map[string]string, 163 | jobLogger log.Logger, 164 | temp billy.Filesystem, 165 | ) gitcollector.JobScheduleFn { 166 | setupJob := func(job *Job) error { 167 | if job.Lib == nil { 168 | job.Lib = lib 169 | } 170 | 171 | switch job.Type { 172 | case JobDownload: 173 | job.TempFS = temp 174 | job.AllowUpdate = updateOnDownload 175 | job.ProcessFn = downloadFn 176 | case JobUpdate: 177 | job.ProcessFn = updateFn 178 | default: 179 | return errWrongJob.New() 180 | } 181 | 182 | job.AuthToken = getAuthTokenByOrg(authTokens) 183 | job.Logger = jobLogger 184 | return nil 185 | } 186 | 187 | return func(ctx context.Context) (gitcollector.Job, error) { 188 | if download == nil && update == nil { 189 | return nil, gitcollector.ErrJobSource.New() 190 | } 191 | 192 | var ( 193 | job *Job 194 | err error 195 | ) 196 | 197 | if download != nil || len(download) > 0 { 198 | job, err = jobFrom(ctx, download) 199 | if err != nil { 200 | if !(errClosedChan.Is(err) || 201 | gitcollector.ErrNewJobsNotFound.Is(err)) { 202 | return nil, err 203 | } 204 | 205 | if errClosedChan.Is(err) { 206 | download = nil 207 | } 208 | } 209 | } 210 | 211 | if job != nil { 212 | if err := setupJob(job); err != nil { 213 | return nil, gitcollector. 214 | ErrNewJobsNotFound.New() 215 | } 216 | 217 | return job, nil 218 | } 219 | 220 | if update == nil && download == nil { 221 | return nil, gitcollector.ErrJobSource.New() 222 | } 223 | 224 | if update == nil { 225 | return nil, gitcollector.ErrNewJobsNotFound.New() 226 | } 227 | 228 | job, err = jobFrom(ctx, update) 229 | if err != nil { 230 | if errClosedChan.Is(err) { 231 | update = nil 232 | } 233 | 234 | return nil, gitcollector.ErrNewJobsNotFound.New() 235 | } 236 | 237 | if err := setupJob(job); err != nil { 238 | return nil, gitcollector.ErrNewJobsNotFound.New() 239 | } 240 | 241 | return job, nil 242 | } 243 | } 244 | 245 | func jobFrom(ctx context.Context, queue chan gitcollector.Job) (*Job, error) { 246 | if queue == nil { 247 | return nil, errClosedChan.New() 248 | } 249 | 250 | select { 251 | case j, ok := <-queue: 252 | if !ok { 253 | return nil, errClosedChan.New() 254 | } 255 | 256 | job, ok := j.(*Job) 257 | if !ok { 258 | return nil, errWrongJob.New() 259 | } 260 | 261 | id, err := uuid.NewRandom() 262 | if err != nil { 263 | return nil, errNotJobID.Wrap(err) 264 | } 265 | 266 | job.ID = id.String() 267 | return job, nil 268 | case <-ctx.Done(): 269 | return nil, gitcollector.ErrNewJobsNotFound.New() 270 | } 271 | } 272 | -------------------------------------------------------------------------------- /cmd/gitcollector/subcmd/download.go: -------------------------------------------------------------------------------- 1 | package subcmd 2 | 3 | import ( 4 | "fmt" 5 | "io/ioutil" 6 | "os" 7 | "runtime" 8 | "strings" 9 | "sync" 10 | "time" 11 | 12 | "github.com/src-d/gitcollector" 13 | "github.com/src-d/gitcollector/discovery" 14 | "github.com/src-d/gitcollector/downloader" 15 | "github.com/src-d/gitcollector/library" 16 | "github.com/src-d/gitcollector/metrics" 17 | "github.com/src-d/gitcollector/provider" 18 | "github.com/src-d/go-borges/siva" 19 | "gopkg.in/src-d/go-billy.v4/osfs" 20 | "gopkg.in/src-d/go-cli.v0" 21 | "gopkg.in/src-d/go-log.v1" 22 | ) 23 | 24 | // DownloadCmd is the gitcollector subcommand to download repositories. 25 | type DownloadCmd struct { 26 | cli.Command `name:"download" short-description:"download repositories from a github organization"` 27 | 28 | LibPath string `long:"library" description:"path where download to" env:"GITCOLLECTOR_LIBRARY" required:"true"` 29 | LibBucket int `long:"bucket" description:"library bucketization level" env:"GITCOLLECTOR_LIBRARY_BUCKET" default:"2"` 30 | TmpPath string `long:"tmp" description:"directory to place generated temporal files" default:"/tmp" env:"GITCOLLECTOR_TMP"` 31 | Workers int `long:"workers" description:"number of workers, default to GOMAXPROCS" env:"GITCOLLECTOR_WORKERS"` 32 | HalfCPU bool `long:"half-cpu" description:"set the number of workers to half of the set workers" env:"GITCOLLECTOR_HALF_CPU"` 33 | NotAllowUpdates bool `long:"no-updates" description:"don't allow updates on already downloaded repositories" env:"GITCOLLECTOR_NO_UPDATES"` 34 | NoForks bool `long:"no-forks" description:"github forked repositories will not be downloaded" env:"GITCOLLECTOR_NO_FORKS"` 35 | Orgs string `long:"orgs" env:"GITHUB_ORGANIZATIONS" description:"list of github organization names separated by comma" required:"true"` 36 | ExcludedRepos string `long:"excluded-repos" env:"GITCOLLECTOR_EXCLUDED_REPOS" description:"list of repos to exclude separated by comma" required:"false"` 37 | Token string `long:"token" env:"GITHUB_TOKEN" description:"github token"` 38 | MetricsDBURI string `long:"metrics-db" env:"GITCOLLECTOR_METRICS_DB_URI" description:"uri to a database where metrics will be sent"` 39 | MetricsDBTable string `long:"metrics-db-table" env:"GITCOLLECTOR_METRICS_DB_TABLE" default:"gitcollector_metrics" description:"table name where the metrics will be added"` 40 | MetricsSync int64 `long:"metrics-sync-timeout" env:"GITCOLLECTOR_METRICS_SYNC" default:"30" description:"timeout in seconds to send metrics"` 41 | } 42 | 43 | // Execute runs the command. 44 | func (c *DownloadCmd) Execute(args []string) error { 45 | start := time.Now() 46 | 47 | if c.Orgs == "" { 48 | log.Warningf("no organizations found, at least one " + 49 | "organization must be provided") 50 | 51 | return nil 52 | } 53 | 54 | o := strings.Split(c.Orgs, ",") 55 | orgs := make([]string, 0, len(o)) 56 | for _, org := range o { 57 | orgs = append(orgs, strings.ToLower(org)) 58 | } 59 | 60 | ers := strings.Split(c.ExcludedRepos, ",") 61 | excludedRepos := make([]string, 0, len(ers)) 62 | for _, er := range ers { 63 | excludedRepos = append(excludedRepos, er) 64 | } 65 | 66 | info, err := os.Stat(c.LibPath) 67 | if err != nil { 68 | log.Errorf(err, "wrong path to locate the library") 69 | return err 70 | } 71 | 72 | if !info.IsDir() { 73 | err := fmt.Errorf("%s isn't a directory", c.LibPath) 74 | log.Errorf(err, "wrong path to locate the library") 75 | return err 76 | } 77 | 78 | fs := osfs.New(c.LibPath) 79 | 80 | tmpPath, err := ioutil.TempDir( 81 | c.TmpPath, "gitcollector-downloader") 82 | if err != nil { 83 | log.Errorf(err, "unable to create temporal directory") 84 | return err 85 | } 86 | defer func() { 87 | if err := os.RemoveAll(tmpPath); err != nil { 88 | log.Warningf( 89 | "couldn't remove temporal directory %s: %s", 90 | tmpPath, err.Error(), 91 | ) 92 | } 93 | }() 94 | 95 | log.Debugf("temporal dir: %s", tmpPath) 96 | temp := osfs.New(tmpPath) 97 | 98 | lib, err := siva.NewLibrary("test", fs, &siva.LibraryOptions{ 99 | Bucket: 2, 100 | Transactional: true, 101 | TempFS: temp, 102 | }) 103 | if err != nil { 104 | log.Errorf(err, "unable to create borges siva library") 105 | return err 106 | } 107 | 108 | authTokens := map[string]string{} 109 | if c.Token != "" { 110 | log.Debugf("acces token found") 111 | for _, org := range orgs { 112 | authTokens[org] = c.Token 113 | } 114 | } 115 | 116 | workers := c.Workers 117 | if workers == 0 { 118 | workers = runtime.GOMAXPROCS(-1) 119 | } 120 | 121 | if c.HalfCPU && workers > 1 { 122 | workers = workers / 2 123 | } 124 | 125 | updateOnDownload := !c.NotAllowUpdates 126 | log.Debugf("allow updates on downloads: %v", updateOnDownload) 127 | 128 | download := make(chan gitcollector.Job, 100) 129 | 130 | schedule := library.NewDownloadJobScheduleFn( 131 | lib, 132 | download, 133 | downloader.Download, 134 | updateOnDownload, 135 | authTokens, 136 | log.New(nil), 137 | temp, 138 | ) 139 | 140 | var mc gitcollector.MetricsCollector 141 | if c.MetricsDBURI != "" { 142 | mc, err = setupMetrics( 143 | c.MetricsDBURI, 144 | c.MetricsDBTable, 145 | orgs, 146 | c.MetricsSync, 147 | ) 148 | if err != nil { 149 | log.Errorf(err, "failed to setup metrics") 150 | return err 151 | } 152 | 153 | log.Debugf("metrics collection activated: sync timeout %d", 154 | c.MetricsSync) 155 | } 156 | 157 | wp := gitcollector.NewWorkerPool( 158 | schedule, 159 | &gitcollector.WorkerPoolOpts{ 160 | Metrics: mc, 161 | }, 162 | ) 163 | 164 | wp.SetWorkers(workers) 165 | log.Debugf("number of workers in the pool %d", wp.Size()) 166 | 167 | wp.Run() 168 | log.Debugf("worker pool is running") 169 | 170 | go runGHOrgProviders(log.New(nil), orgs, excludedRepos, c.Token, download, c.NoForks) 171 | 172 | wp.Wait() 173 | log.Debugf("worker pool stopped successfully") 174 | 175 | elapsed := time.Since(start).String() 176 | log.Infof("collection finished in %s", elapsed) 177 | return nil 178 | } 179 | 180 | func setupMetrics( 181 | uri, table string, 182 | orgs []string, 183 | metricSync int64, 184 | ) (gitcollector.MetricsCollector, error) { 185 | db, err := metrics.PrepareDB(uri, table, orgs) 186 | if err != nil { 187 | log.Errorf(err, "metrics database") 188 | return nil, err 189 | } 190 | 191 | mcs := make(map[string]*metrics.Collector, len(orgs)) 192 | for _, org := range orgs { 193 | mc := metrics.NewCollector(&metrics.CollectorOpts{ 194 | Log: log.New(log.Fields{"org": org}), 195 | Send: metrics.SendToDB(db, table, org), 196 | SyncTime: time.Duration(metricSync) * time.Second, 197 | }) 198 | 199 | mcs[org] = mc 200 | } 201 | 202 | return metrics.NewCollectorByOrg(mcs), nil 203 | } 204 | 205 | func runGHOrgProviders( 206 | logger log.Logger, 207 | orgs []string, 208 | excludedRepos []string, 209 | token string, 210 | download chan gitcollector.Job, 211 | skipForks bool, 212 | ) { 213 | var wg sync.WaitGroup 214 | wg.Add(len(orgs)) 215 | for _, o := range orgs { 216 | org := o 217 | p := provider.NewGitHubOrg( 218 | org, 219 | excludedRepos, 220 | token, 221 | download, 222 | &discovery.GitHubOpts{ 223 | SkipForks: skipForks, 224 | }, 225 | ) 226 | 227 | go func() { 228 | err := p.Start() 229 | if err != nil && 230 | !discovery.ErrNewRepositoriesNotFound.Is(err) { 231 | logger.Warningf(err.Error()) 232 | } 233 | 234 | logger.Debugf("%s organization provider stopped", org) 235 | wg.Done() 236 | }() 237 | 238 | logger.Debugf("%s organization provider started", org) 239 | } 240 | 241 | wg.Wait() 242 | close(download) 243 | } 244 | -------------------------------------------------------------------------------- /testutils/proxy.go: -------------------------------------------------------------------------------- 1 | package testutils 2 | 3 | import ( 4 | "context" 5 | "crypto/tls" 6 | "fmt" 7 | "io" 8 | "math/rand" 9 | "net" 10 | "net/http" 11 | "net/url" 12 | "strconv" 13 | "sync" 14 | "time" 15 | 16 | "github.com/cenkalti/backoff" 17 | "gopkg.in/src-d/go-log.v1" 18 | ) 19 | 20 | const ( 21 | pingInterval = 500 * time.Millisecond 22 | pingTimeout = 15 * time.Second 23 | ) 24 | 25 | func init() { 26 | rand.Seed(time.Now().UTC().UnixNano()) 27 | } 28 | 29 | /* 30 | Current file provides functionality of https and http proxy with ability to mock up responses 31 | for testing purposes. Usually it's needed when client is not accessible from testing environment. 32 | 33 | In this case several steps need to be done: 34 | 1) save http.DefaultTransport to a variable healthyTransport 35 | 2) use healthyTransport as an argument to the proxy constructor 36 | 3) spin up your proxy server 37 | 4) change http.DefaultTransport to the one that uses your proxy 38 | 5) defer changing http.DefaultTransport to the regular one after test is finished 39 | 40 | Snippet: 41 | healthyTransport := http.DefaultTransport 42 | defer func() { http.DefaultTransport = healthyTransport }() 43 | 44 | proxy, err := testutils.NewProxy(healthyTransport, &testutils.Options{Code: 404}) 45 | require.NoError(t, err) 46 | 47 | proxy.Start() 48 | defer func() { proxy.Stop() }() 49 | 50 | require.NoError(t, proxy.SetTransportProxy()) 51 | ... 52 | */ 53 | 54 | // TODO(@lwsanty): implement configs for different endpoints if needed 55 | 56 | // Proxy is a struct of proxy wrapper 57 | type Proxy struct { 58 | mu sync.Mutex 59 | server *http.Server 60 | transport http.RoundTripper 61 | port string 62 | // options for mock ups 63 | options *Options 64 | // FailEachNthRequestCounter is a counter required for failing condition on each nth request 65 | FailEachNthRequestCounter int 66 | // FailThresholdCounter is a counter required for failing condition after threshold has been reached 67 | FailThresholdCounter int 68 | } 69 | 70 | // Options represents the amount of options for mock ups 71 | type Options struct { 72 | // Error is a trigger to fail with http.Error 73 | Error bool 74 | // ErrorText is an argument for http.Error in the case of Error == true 75 | ErrorText string 76 | // Code is a status code returned in response 77 | Code int 78 | // Delay represents the amount of time that handler will wait before response 79 | Delay time.Duration 80 | // FailEachNthRequest defines the period of requests failure 81 | FailEachNthRequest int 82 | // FailEachNthCode defines the status code to be returned during periodical requests failure 83 | FailEachNthCode int 84 | // FailThreshold defines the threshold of successfully processed requests, after which mocked responses will be returned 85 | FailThreshold int 86 | // FailThresholdCode defines the status code to be returned after threshold overcome 87 | FailThresholdCode int 88 | // pemPath and keyPath are paths to certs that required by https server 89 | PemPath string 90 | KeyPath string 91 | } 92 | 93 | // NewProxy is a proxy constructor 94 | func NewProxy(transport http.RoundTripper, options *Options) (*Proxy, error) { 95 | processOptions(options) 96 | port := strconv.Itoa(rand.Intn(10000) + 10000) 97 | 98 | proxy := &Proxy{ 99 | transport: transport, 100 | options: options, 101 | port: port, 102 | server: &http.Server{ 103 | Addr: ":" + port, 104 | // Disable HTTP/2. 105 | TLSNextProto: make(map[string]func(*http.Server, *tls.Conn, http.Handler)), 106 | }, 107 | } 108 | proxy.initHandler() 109 | 110 | return proxy, nil 111 | } 112 | 113 | func (p *Proxy) initHandler() { 114 | p.server.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 115 | if r.Method == http.MethodConnect { 116 | p.handleTunneling(w, r) 117 | } else { 118 | p.handleHTTP(w, r) 119 | } 120 | }) 121 | } 122 | 123 | // Start starts proxy server 124 | func (p *Proxy) Start() error { 125 | go func() { 126 | if err := p.server.ListenAndServeTLS(p.options.PemPath, p.options.KeyPath); err != nil { 127 | if err != http.ErrServerClosed { 128 | log.Errorf(err, "https server failed") 129 | } 130 | } 131 | }() 132 | 133 | bo := backoff.NewExponentialBackOff() 134 | bo.MaxInterval = pingInterval 135 | bo.MaxElapsedTime = pingTimeout 136 | return backoff.Retry(func() error { 137 | conn, err := net.DialTimeout("tcp", p.server.Addr, time.Second/2) 138 | if err == nil { 139 | conn.Close() 140 | return nil 141 | } 142 | return fmt.Errorf("start server: timeout") 143 | }, bo) 144 | } 145 | 146 | // Stop stops proxy server 147 | func (p *Proxy) Stop() { 148 | if err := p.server.Shutdown(context.Background()); err != nil { 149 | log.Errorf(err, "https server shutdown failed") 150 | } 151 | } 152 | 153 | // SetTransportProxy changes http.DefaultTransport to the one that uses current server as a proxy 154 | func (p *Proxy) SetTransportProxy() error { 155 | u, err := url.Parse("https://localhost:" + p.port) 156 | if err != nil { 157 | return err 158 | } 159 | 160 | http.DefaultTransport = &http.Transport{ 161 | Proxy: http.ProxyURL(u), 162 | // Disable HTTP/2. 163 | TLSHandshakeTimeout: 2 * time.Second, 164 | TLSNextProto: make(map[string]func(authority string, c *tls.Conn) http.RoundTripper), 165 | TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, 166 | } 167 | 168 | return nil 169 | } 170 | 171 | func (p *Proxy) handleTunneling(w http.ResponseWriter, r *http.Request) { 172 | time.Sleep(p.options.Delay) 173 | 174 | if p.options.Error { 175 | w.WriteHeader(p.options.Code) 176 | http.Error(w, p.options.ErrorText, p.options.Code) 177 | return 178 | } 179 | 180 | if p.options.FailEachNthRequest > 0 { 181 | p.mu.Lock() 182 | p.FailEachNthRequestCounter++ 183 | c := p.FailEachNthRequestCounter 184 | p.mu.Unlock() 185 | 186 | if c%p.options.FailEachNthRequest == 0 { 187 | http.Error(w, "periodical fail", p.options.FailEachNthCode) 188 | return 189 | } 190 | } 191 | 192 | if p.options.FailThreshold > 0 { 193 | p.mu.Lock() 194 | p.FailThresholdCounter++ 195 | c := p.FailThresholdCounter 196 | p.mu.Unlock() 197 | 198 | if c > p.options.FailThreshold { 199 | http.Error(w, "periodical fail", p.options.FailThresholdCode) 200 | return 201 | } 202 | } 203 | 204 | destConn, err := net.DialTimeout("tcp", r.Host, 10*time.Second) 205 | if err != nil { 206 | http.Error(w, err.Error(), http.StatusServiceUnavailable) 207 | return 208 | } 209 | 210 | w.WriteHeader(p.options.Code) 211 | 212 | hijacker, ok := w.(http.Hijacker) 213 | if !ok { 214 | http.Error(w, "Hijacking not supported", http.StatusInternalServerError) 215 | return 216 | } 217 | clientConn, _, err := hijacker.Hijack() 218 | if err != nil { 219 | http.Error(w, err.Error(), http.StatusServiceUnavailable) 220 | } 221 | go transfer(destConn, clientConn) 222 | go transfer(clientConn, destConn) 223 | } 224 | 225 | func transfer(destination io.WriteCloser, source io.ReadCloser) { 226 | defer destination.Close() 227 | defer source.Close() 228 | io.Copy(destination, source) 229 | } 230 | 231 | func (p *Proxy) handleHTTP(w http.ResponseWriter, req *http.Request) { 232 | resp, err := p.transport.RoundTrip(req) 233 | if err != nil { 234 | http.Error(w, err.Error(), http.StatusServiceUnavailable) 235 | return 236 | } 237 | defer resp.Body.Close() 238 | copyHeader(w.Header(), resp.Header) 239 | w.WriteHeader(resp.StatusCode) 240 | io.Copy(w, resp.Body) 241 | } 242 | 243 | func copyHeader(dst, src http.Header) { 244 | for k, vv := range src { 245 | for _, v := range vv { 246 | dst.Add(k, v) 247 | } 248 | } 249 | } 250 | 251 | func processOptions(o *Options) { 252 | ok := http.StatusOK 253 | if o.Code == 0 { 254 | o.Code = ok 255 | } 256 | if o.FailEachNthCode == 0 { 257 | o.FailEachNthCode = ok 258 | } 259 | if o.FailThresholdCode == 0 { 260 | o.FailThresholdCode = ok 261 | } 262 | } 263 | -------------------------------------------------------------------------------- /metrics/metrics.go: -------------------------------------------------------------------------------- 1 | package metrics 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "sync" 7 | "time" 8 | 9 | "github.com/src-d/gitcollector" 10 | "github.com/src-d/gitcollector/library" 11 | "gopkg.in/src-d/go-log.v1" 12 | ) 13 | 14 | // SendFn is the function a Collector will use to export metrics. 15 | type SendFn func(context.Context, *Collector, *library.Job) error 16 | 17 | // CollectorOpts represenst configuration options for a Collector. 18 | type CollectorOpts struct { 19 | BatchSize int 20 | SyncTime time.Duration 21 | Log log.Logger 22 | Send SendFn 23 | } 24 | 25 | // Collector is an implementation of gitcollector.MetricsCollector 26 | type Collector struct { 27 | logger log.Logger 28 | opts *CollectorOpts 29 | 30 | success chan gitcollector.Job 31 | successDownloadCount uint64 32 | successUpdateCount uint64 33 | 34 | fail chan gitcollector.Job 35 | failCount uint64 36 | 37 | discover chan gitcollector.Job 38 | discoverCount uint64 39 | 40 | wg sync.WaitGroup 41 | cancel chan bool 42 | } 43 | 44 | var _ gitcollector.MetricsCollector = (*Collector)(nil) 45 | 46 | const ( 47 | batchSize = 10 48 | syncTime = 30 * time.Second 49 | waitTimeout = 5 * time.Second 50 | ) 51 | 52 | // NewCollector builds a new Collector. 53 | func NewCollector(opts *CollectorOpts) *Collector { 54 | if opts.BatchSize <= 0 { 55 | opts.BatchSize = batchSize 56 | } 57 | 58 | if opts.SyncTime <= 0 { 59 | opts.SyncTime = syncTime 60 | } 61 | 62 | if opts.Log == nil { 63 | opts.Log = log.New(nil) 64 | } 65 | 66 | if opts.Send == nil { 67 | opts.Send = func( 68 | _ context.Context, 69 | _ *Collector, 70 | _ *library.Job, 71 | ) error { 72 | return nil 73 | } 74 | } 75 | 76 | opts.Log = opts.Log.New(log.Fields{"metrics": "library"}) 77 | capacity := 5 * opts.BatchSize 78 | return &Collector{ 79 | logger: opts.Log, 80 | opts: opts, 81 | success: make(chan gitcollector.Job, capacity), 82 | fail: make(chan gitcollector.Job, capacity), 83 | discover: make(chan gitcollector.Job, capacity), 84 | cancel: make(chan bool), 85 | } 86 | } 87 | 88 | const ( 89 | successKind = iota 90 | failKind 91 | discoverKind 92 | ) 93 | 94 | // Start implements the gitcollector.MetricsCollector interface. 95 | func (c *Collector) Start() { 96 | c.wg.Add(1) 97 | defer c.wg.Done() 98 | 99 | var ( 100 | stop bool 101 | batch int 102 | lastSent = time.Now() 103 | job *library.Job 104 | waiting bool 105 | ) 106 | 107 | ctx := context.Background() 108 | for !(c.isClosed() || stop) { 109 | var ( 110 | j gitcollector.Job 111 | kind int 112 | ) 113 | 114 | select { 115 | case job, ok := <-c.success: 116 | if !ok { 117 | c.success = nil 118 | continue 119 | } 120 | 121 | j, kind = job, successKind 122 | case job, ok := <-c.fail: 123 | if !ok { 124 | c.fail = nil 125 | continue 126 | } 127 | 128 | j, kind = job, failKind 129 | case job, ok := <-c.discover: 130 | if !ok { 131 | c.discover = nil 132 | continue 133 | } 134 | 135 | j, kind = job, discoverKind 136 | case stop = <-c.cancel: 137 | c.close() 138 | continue 139 | case <-time.After(waitTimeout): 140 | if !waiting { 141 | c.logger.Debugf("waiting new metrics") 142 | waiting = true 143 | } 144 | } 145 | 146 | if j != nil { 147 | var ok bool 148 | job, ok = j.(*library.Job) 149 | if !ok { 150 | c.logger.Warningf("wrong job found: %T", j) 151 | continue 152 | } 153 | 154 | if err := c.modifyMetrics(job, kind); err != nil { 155 | log.Warningf(err.Error()) 156 | continue 157 | } 158 | 159 | batch++ 160 | waiting = false 161 | } 162 | 163 | if c.sendMetric(batch, lastSent) { 164 | if err := c.opts.Send(ctx, c, job); err != nil { 165 | c.logger.Warningf( 166 | "couldn't send metrics: %s", 167 | err.Error(), 168 | ) 169 | 170 | continue 171 | } 172 | 173 | c.logMetrics(true) 174 | lastSent = time.Now() 175 | batch = 0 176 | waiting = false 177 | } 178 | } 179 | 180 | if batch > 0 && !stop { 181 | if err := c.opts.Send(ctx, c, job); err != nil { 182 | c.logger.Warningf( 183 | "couldn't send metrics: %s", 184 | err.Error(), 185 | ) 186 | } 187 | } 188 | 189 | c.logMetrics(false) 190 | } 191 | 192 | func (c *Collector) logMetrics(debug bool) { 193 | logger := c.logger.New(log.Fields{ 194 | "discover": c.discoverCount, 195 | "download": c.successDownloadCount, 196 | "update": c.successUpdateCount, 197 | "fail": c.failCount, 198 | }) 199 | 200 | msg := "metrics updated" 201 | if debug { 202 | logger.Debugf(msg) 203 | } else { 204 | logger.Infof(msg) 205 | } 206 | } 207 | 208 | func (c *Collector) isClosed() bool { 209 | return c.success == nil && c.fail == nil && c.discover == nil 210 | } 211 | 212 | func (c *Collector) close() { 213 | close(c.success) 214 | close(c.fail) 215 | close(c.discover) 216 | close(c.cancel) 217 | c.cancel = nil 218 | } 219 | 220 | func (c *Collector) modifyMetrics(job *library.Job, kind int) error { 221 | switch kind { 222 | case successKind: 223 | if job.Type == library.JobDownload { 224 | c.successDownloadCount++ 225 | break 226 | } 227 | 228 | for range job.Endpoints() { 229 | c.successUpdateCount++ 230 | } 231 | case failKind: 232 | for range job.Endpoints() { 233 | c.failCount++ 234 | } 235 | case discoverKind: 236 | if job.Type == library.JobDownload { 237 | c.discoverCount++ 238 | } 239 | default: 240 | return fmt.Errorf("wrong metric type found: %d", kind) 241 | } 242 | 243 | return nil 244 | } 245 | 246 | func (c *Collector) sendMetric(batch int, lastSent time.Time) bool { 247 | fullBatch := batch >= c.opts.BatchSize 248 | syncTimeout := time.Since(lastSent) >= c.opts.SyncTime 249 | if syncTimeout { 250 | msg := "sync timeout" 251 | if batch == 0 { 252 | msg += ": nothing to update" 253 | } 254 | 255 | c.logger.Debugf(msg) 256 | } 257 | 258 | return fullBatch || (syncTimeout && batch > 0) 259 | } 260 | 261 | // Stop implements the gitcollector.MetricsCollector interface. 262 | func (c *Collector) Stop(immediate bool) { 263 | if c.cancel == nil { 264 | return 265 | } 266 | 267 | c.cancel <- immediate 268 | c.wg.Wait() 269 | } 270 | 271 | // Success implements the gitcollector.MetricsCollector interface. 272 | func (c *Collector) Success(job gitcollector.Job) { 273 | c.success <- job 274 | } 275 | 276 | // Fail implements the gitcollector.MetricsCollector interface. 277 | func (c *Collector) Fail(job gitcollector.Job) { 278 | c.fail <- job 279 | } 280 | 281 | // Discover implements the gitcollector.MetricsCollector interface. 282 | func (c *Collector) Discover(job gitcollector.Job) { 283 | c.discover <- job 284 | } 285 | 286 | // CollectorByOrg plays as a reverse proxy Collector for several organizations. 287 | type CollectorByOrg struct { 288 | orgMetrics map[string]*Collector 289 | } 290 | 291 | // NewCollectorByOrg builds a new CollectorByOrg. 292 | func NewCollectorByOrg(orgsMetrics map[string]*Collector) *CollectorByOrg { 293 | return &CollectorByOrg{ 294 | orgMetrics: orgsMetrics, 295 | } 296 | } 297 | 298 | // Start implements the gitcollector.MetricsCollector interface. 299 | func (c *CollectorByOrg) Start() { 300 | for _, m := range c.orgMetrics { 301 | go m.Start() 302 | } 303 | } 304 | 305 | // Stop implements the gitcollector.MetricsCollector interface. 306 | func (c *CollectorByOrg) Stop(immediate bool) { 307 | for _, m := range c.orgMetrics { 308 | m.Stop(immediate) 309 | } 310 | } 311 | 312 | // Success implements the gitcollector.MetricsCollector interface. 313 | func (c *CollectorByOrg) Success(job gitcollector.Job) { 314 | orgs := triageJob(job) 315 | for org, job := range orgs { 316 | m, ok := c.orgMetrics[org] 317 | if !ok { 318 | continue 319 | } 320 | 321 | m.Success(job) 322 | } 323 | } 324 | 325 | // Fail implements the gitcollector.MetricsCollector interface. 326 | func (c *CollectorByOrg) Fail(job gitcollector.Job) { 327 | orgs := triageJob(job) 328 | for org, job := range orgs { 329 | m, ok := c.orgMetrics[org] 330 | if !ok { 331 | continue 332 | } 333 | 334 | m.Fail(job) 335 | } 336 | } 337 | 338 | // Discover implements the gitcollector.MetricsCollector interface. 339 | func (c *CollectorByOrg) Discover(job gitcollector.Job) { 340 | orgs := triageJob(job) 341 | for org, job := range orgs { 342 | m, ok := c.orgMetrics[org] 343 | if !ok { 344 | continue 345 | } 346 | 347 | m.Discover(job) 348 | } 349 | } 350 | 351 | func triageJob(job gitcollector.Job) map[string]*library.Job { 352 | organizations := map[string]*library.Job{} 353 | lj, _ := job.(*library.Job) 354 | for _, ep := range lj.Endpoints() { 355 | org := library.GetOrgFromEndpoint(ep) 356 | j, ok := organizations[org] 357 | if !ok { 358 | j = &(*lj) 359 | j.SetEndpoints([]string{}) 360 | organizations[org] = j 361 | } 362 | 363 | j.SetEndpoints(append(j.Endpoints(), ep)) 364 | } 365 | 366 | return organizations 367 | } 368 | -------------------------------------------------------------------------------- /downloader/git.go: -------------------------------------------------------------------------------- 1 | package downloader 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "io" 7 | "os" 8 | "path/filepath" 9 | 10 | "github.com/src-d/go-borges" 11 | "github.com/src-d/go-borges/siva" 12 | "gopkg.in/src-d/go-billy.v4" 13 | "gopkg.in/src-d/go-billy.v4/util" 14 | "gopkg.in/src-d/go-errors.v1" 15 | "gopkg.in/src-d/go-git.v4" 16 | "gopkg.in/src-d/go-git.v4/config" 17 | "gopkg.in/src-d/go-git.v4/plumbing" 18 | "gopkg.in/src-d/go-git.v4/plumbing/cache" 19 | "gopkg.in/src-d/go-git.v4/plumbing/object" 20 | "gopkg.in/src-d/go-git.v4/plumbing/transport/http" 21 | "gopkg.in/src-d/go-git.v4/storage/filesystem" 22 | ) 23 | 24 | var ( 25 | // ErrObjectTypeNotSupported returned by ResolveCommit when the 26 | // referenced object isn't a Commit nor a Tag. 27 | ErrObjectTypeNotSupported = errors.NewKind( 28 | "object type %q not supported") 29 | ) 30 | 31 | const ( 32 | cloneRootPath = "local_repos" 33 | fetchHEADStr = "+HEAD:refs/remotes/%s/HEAD" 34 | fetchRefSpecStr = "+refs/*:refs/remotes/%s/*" 35 | ) 36 | 37 | // CloneRepository clones a git repository from the given endpoint into the 38 | // billy.Filesystem. A remote with the id is created for that. 39 | func CloneRepository( 40 | ctx context.Context, 41 | fs billy.Filesystem, 42 | path, endpoint, id, token string, 43 | ) (*git.Repository, error) { 44 | repoFS, err := fs.Chroot(path) 45 | if err != nil { 46 | return nil, err 47 | } 48 | 49 | sto := filesystem.NewStorage(repoFS, cache.NewObjectLRUDefault()) 50 | repo, err := git.Init(sto, nil) 51 | if err != nil { 52 | util.RemoveAll(fs, path) 53 | return nil, err 54 | } 55 | 56 | remote, err := createRemote(repo, id, endpoint) 57 | if err != nil { 58 | util.RemoveAll(fs, path) 59 | return nil, err 60 | } 61 | 62 | opts := &git.FetchOptions{ 63 | RefSpecs: []config.RefSpec{ 64 | config.RefSpec(fmt.Sprintf(fetchHEADStr, id)), 65 | }, 66 | Force: true, 67 | Tags: git.NoTags, 68 | } 69 | 70 | if token != "" { 71 | opts.Auth = &http.BasicAuth{ 72 | Username: "gitcollector", 73 | Password: token, 74 | } 75 | } 76 | 77 | if err = remote.FetchContext(ctx, opts); err != nil { 78 | util.RemoveAll(fs, path) 79 | return nil, err 80 | } 81 | 82 | return repo, nil 83 | } 84 | 85 | func createRemote(r *git.Repository, id, endpoint string) (*git.Remote, error) { 86 | rc := &config.RemoteConfig{ 87 | Name: id, 88 | URLs: []string{endpoint}, 89 | Fetch: []config.RefSpec{ 90 | config.RefSpec(fmt.Sprintf(fetchHEADStr, id)), 91 | config.RefSpec(fmt.Sprintf(fetchRefSpecStr, id)), 92 | }} 93 | 94 | remote, err := r.Remote(id) 95 | if err != nil { 96 | return r.CreateRemote(rc) 97 | } 98 | 99 | if remote.Config() == rc { 100 | return remote, nil 101 | } 102 | 103 | cfg, err := r.Config() 104 | if err != nil { 105 | return nil, err 106 | } 107 | 108 | cfg.Remotes[id] = rc 109 | if err := r.Storer.SetConfig(cfg); err != nil { 110 | return nil, err 111 | } 112 | 113 | return r.Remote(id) 114 | } 115 | 116 | // RootCommit traverse the commit history for the given remote following the 117 | // first parent of each commit. The root commit found (commit with no parents) 118 | // is returned. 119 | func RootCommit( 120 | repo *git.Repository, 121 | remote string, 122 | ) (*object.Commit, error) { 123 | start, err := headCommit(repo, remote) 124 | if err != nil { 125 | return nil, err 126 | } 127 | 128 | current := start 129 | for len(current.ParentHashes) > 0 { 130 | current, err = current.Parent(0) 131 | if err != nil { 132 | return nil, err 133 | } 134 | } 135 | 136 | return current, nil 137 | } 138 | 139 | func headCommit(repo *git.Repository, id string) (*object.Commit, error) { 140 | ref, err := repo.Reference( 141 | plumbing.NewRemoteHEADReferenceName(id), 142 | true, 143 | ) 144 | 145 | if err != nil { 146 | return nil, err 147 | } 148 | 149 | return resolveCommit(repo, ref.Hash()) 150 | } 151 | 152 | func resolveCommit( 153 | repo *git.Repository, 154 | hash plumbing.Hash, 155 | ) (*object.Commit, error) { 156 | obj, err := repo.Object(plumbing.AnyObject, hash) 157 | if err != nil { 158 | return nil, err 159 | } 160 | 161 | switch o := obj.(type) { 162 | case *object.Commit: 163 | return o, nil 164 | case *object.Tag: 165 | return resolveCommit(repo, o.Target) 166 | default: 167 | return nil, ErrObjectTypeNotSupported.New(o.Type()) 168 | } 169 | } 170 | 171 | // PrepareRepository returns a borges.Repository ready to fetch changes. 172 | // It creates a rooted repository copying the cloned repository in tmp to 173 | // the siva file the library uses at the location with the given location ID, 174 | // creating this location if not exists. 175 | func PrepareRepository( 176 | ctx context.Context, 177 | lib *siva.Library, 178 | locID borges.LocationID, 179 | repoID borges.RepositoryID, 180 | endpoint string, 181 | tmp billy.Filesystem, 182 | clonePath string, 183 | ) (borges.Repository, error) { 184 | var r borges.Repository 185 | 186 | loc, err := lib.AddLocation(locID) 187 | if err != nil { 188 | if !siva.ErrLocationExists.Is(err) { 189 | return nil, err 190 | } 191 | 192 | loc, err = lib.Location(locID) 193 | if err != nil { 194 | return nil, err 195 | } 196 | 197 | r, err = loc.Get(repoID, borges.RWMode) 198 | if err != nil { 199 | r, err = loc.Init(repoID) 200 | if err != nil { 201 | return nil, err 202 | } 203 | } 204 | } 205 | 206 | if r == nil { 207 | r, err = createRootedRepo(ctx, loc, repoID, tmp, clonePath) 208 | if err != nil { 209 | return nil, err 210 | } 211 | } 212 | 213 | if _, err := createRemote(r.R(), repoID.String(), endpoint); err != nil { 214 | if cErr := r.Close(); cErr != nil { 215 | err = fmt.Errorf("%s: %s", err.Error(), cErr.Error()) 216 | } 217 | 218 | return nil, err 219 | } 220 | 221 | return r, nil 222 | } 223 | 224 | // FetchChanges fetches changes for the given remote into the borges.Repository. 225 | func FetchChanges( 226 | ctx context.Context, 227 | r borges.Repository, 228 | remote string, 229 | token string, 230 | ) error { 231 | opts := &git.FetchOptions{ 232 | RemoteName: remote, 233 | } 234 | 235 | if token != "" { 236 | opts.Auth = &http.BasicAuth{ 237 | Username: "gitcollector", 238 | Password: token, 239 | } 240 | } 241 | 242 | if err := r.R().FetchContext( 243 | ctx, opts, 244 | ); err != nil && err != git.NoErrAlreadyUpToDate { 245 | if cErr := r.Close(); cErr != nil { 246 | err = fmt.Errorf("%s: %s", err.Error(), cErr.Error()) 247 | } 248 | 249 | return err 250 | } 251 | 252 | return nil 253 | } 254 | 255 | func createRootedRepo( 256 | ctx context.Context, 257 | loc borges.Location, 258 | repoID borges.RepositoryID, 259 | clonedFS billy.Filesystem, 260 | clonedPath string, 261 | ) (borges.Repository, error) { 262 | repo, err := loc.Init(repoID) 263 | if err != nil { 264 | return nil, err 265 | } 266 | 267 | err = recursiveCopy(ctx, "/", repo.FS(), clonedPath, clonedFS) 268 | if err != nil { 269 | repo = nil 270 | } 271 | 272 | return repo, err 273 | } 274 | 275 | func recursiveCopy( 276 | ctx context.Context, 277 | dst string, 278 | dstFS billy.Filesystem, 279 | src string, 280 | srcFS billy.Filesystem, 281 | ) error { 282 | select { 283 | case <-ctx.Done(): 284 | return ctx.Err() 285 | default: 286 | } 287 | 288 | stat, err := srcFS.Stat(src) 289 | if err != nil { 290 | return err 291 | } 292 | 293 | if stat.IsDir() { 294 | err = dstFS.MkdirAll(dst, stat.Mode()) 295 | if err != nil { 296 | return err 297 | } 298 | 299 | files, err := srcFS.ReadDir(src) 300 | if err != nil { 301 | return err 302 | } 303 | 304 | for _, file := range files { 305 | srcPath := filepath.Join(src, file.Name()) 306 | dstPath := filepath.Join(dst, file.Name()) 307 | 308 | err = recursiveCopy(ctx, dstPath, dstFS, srcPath, srcFS) 309 | if err != nil { 310 | return err 311 | } 312 | } 313 | } else { 314 | err = copyFile(ctx, dst, dstFS, src, srcFS, stat.Mode()) 315 | if err != nil { 316 | return err 317 | } 318 | } 319 | 320 | return nil 321 | } 322 | 323 | func copyFile( 324 | ctx context.Context, 325 | dst string, 326 | dstFS billy.Filesystem, 327 | src string, 328 | srcFS billy.Filesystem, 329 | mode os.FileMode, 330 | ) error { 331 | select { 332 | case <-ctx.Done(): 333 | return ctx.Err() 334 | default: 335 | } 336 | 337 | _, err := srcFS.Stat(src) 338 | if err != nil { 339 | return err 340 | } 341 | 342 | fo, err := srcFS.Open(src) 343 | if err != nil { 344 | return err 345 | } 346 | defer fo.Close() 347 | 348 | fd, err := dstFS.OpenFile(dst, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, mode) 349 | if err != nil { 350 | return err 351 | } 352 | defer fd.Close() 353 | 354 | _, err = io.Copy(fd, newContextReader(ctx, fo)) 355 | if err != nil { 356 | fd.Close() 357 | dstFS.Remove(dst) 358 | return err 359 | } 360 | 361 | return nil 362 | } 363 | 364 | type contextReader struct { 365 | reader io.Reader 366 | ctx context.Context 367 | } 368 | 369 | func newContextReader(ctx context.Context, reader io.Reader) *contextReader { 370 | return &contextReader{ 371 | ctx: ctx, 372 | reader: reader, 373 | } 374 | } 375 | 376 | func (c *contextReader) Read(p []byte) (n int, err error) { 377 | select { 378 | case <-c.ctx.Done(): 379 | return 0, c.ctx.Err() 380 | default: 381 | } 382 | 383 | return c.reader.Read(p) 384 | } 385 | -------------------------------------------------------------------------------- /discovery/github_test.go: -------------------------------------------------------------------------------- 1 | package discovery 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "net/http" 7 | "os" 8 | "strings" 9 | "testing" 10 | "time" 11 | 12 | "github.com/src-d/gitcollector/testutils" 13 | 14 | "github.com/google/go-github/v28/github" 15 | "github.com/stretchr/testify/require" 16 | ) 17 | 18 | const couldNotFindNewRepos = "couldn't find new repositories" 19 | 20 | func TestGitHub(t *testing.T) { 21 | var req = require.New(t) 22 | 23 | const ( 24 | org = "src-d" 25 | timeToStop = 5 * time.Second 26 | ) 27 | 28 | token, err := getToken() 29 | if err != nil { 30 | t.Skip(err.Error()) 31 | } 32 | 33 | queue := make(chan *github.Repository, 50) 34 | advertiseRepos := func( 35 | _ context.Context, 36 | repos []*github.Repository, 37 | ) error { 38 | for _, repo := range repos { 39 | queue <- repo 40 | } 41 | 42 | return nil 43 | } 44 | 45 | discovery := NewGitHub( 46 | advertiseRepos, 47 | NewGHOrgReposIter(org, []string{}, &GHReposIterOpts{ 48 | TimeNewRepos: 1 * time.Second, 49 | ResultsPerPage: 100, 50 | AuthToken: token, 51 | }), 52 | &GitHubOpts{ 53 | MaxJobBuffer: 50, 54 | }, 55 | ) 56 | 57 | var ( 58 | consumedRepos = make(chan *github.Repository, 200) 59 | stop bool 60 | done = make(chan struct{}) 61 | ) 62 | 63 | go func() { 64 | defer func() { done <- struct{}{} }() 65 | for !stop { 66 | select { 67 | case repo, ok := <-queue: 68 | if !ok { 69 | return 70 | } 71 | 72 | select { 73 | case consumedRepos <- repo: 74 | case <-time.After(timeToStop): 75 | stop = true 76 | } 77 | } 78 | } 79 | }() 80 | 81 | err = discovery.Start() 82 | req.True(ErrDiscoveryStopped.Is(err)) 83 | 84 | close(queue) 85 | <-done 86 | req.False(stop) 87 | close(consumedRepos) 88 | 89 | for repo := range consumedRepos { 90 | ep, err := GetGHEndpoint(repo) 91 | req.NoError(err) 92 | req.True(strings.Contains(ep, org)) 93 | } 94 | } 95 | 96 | func TestGitHubSkipForks(t *testing.T) { 97 | var req = require.New(t) 98 | const org = "src-d" 99 | 100 | token, skip := getToken() 101 | if skip != nil { 102 | t.Skip(skip.Error()) 103 | } 104 | 105 | queue := make(chan *github.Repository, 200) 106 | advertiseRepos := func( 107 | _ context.Context, 108 | repos []*github.Repository, 109 | ) error { 110 | for _, repo := range repos { 111 | queue <- repo 112 | } 113 | 114 | return nil 115 | } 116 | 117 | discovery := NewGitHub( 118 | advertiseRepos, 119 | NewGHOrgReposIter(org, []string{}, &GHReposIterOpts{ 120 | AuthToken: token, 121 | }), 122 | &GitHubOpts{ 123 | SkipForks: true, 124 | MaxJobBuffer: 50, 125 | }, 126 | ) 127 | 128 | done := make(chan struct{}) 129 | var err error 130 | go func() { 131 | err = discovery.Start() 132 | close(done) 133 | }() 134 | 135 | <-done 136 | req.True(ErrNewRepositoriesNotFound.Is(err), err.Error()) 137 | close(queue) 138 | forkedRepos := []string{"or-tools", "PyHive", "go-oniguruma"} 139 | for repo := range queue { 140 | ep, err := GetGHEndpoint(repo) 141 | req.NoError(err) 142 | req.True(strings.Contains(ep, org)) 143 | 144 | for _, forked := range forkedRepos { 145 | req.False(strings.Contains(ep, forked)) 146 | } 147 | } 148 | } 149 | 150 | func TestExcludeRepos(t *testing.T) { 151 | var req = require.New(t) 152 | 153 | const ( 154 | org = "src-d" 155 | timeToStop = 5 * time.Second 156 | ) 157 | 158 | token, err := getToken() 159 | if err != nil { 160 | t.Skip(err.Error()) 161 | } 162 | 163 | queue := make(chan *github.Repository, 50) 164 | advertiseRepos := func( 165 | _ context.Context, 166 | repos []*github.Repository, 167 | ) error { 168 | for _, repo := range repos { 169 | queue <- repo 170 | } 171 | 172 | return nil 173 | } 174 | 175 | discovery := NewGitHub( 176 | advertiseRepos, 177 | NewGHOrgReposIter(org, []string{"gitcollector"}, &GHReposIterOpts{ 178 | TimeNewRepos: 1 * time.Second, 179 | ResultsPerPage: 100, 180 | AuthToken: token, 181 | }), 182 | &GitHubOpts{ 183 | MaxJobBuffer: 50, 184 | }, 185 | ) 186 | 187 | var ( 188 | consumedRepos = make(chan *github.Repository, 200) 189 | stop bool 190 | done = make(chan struct{}) 191 | ) 192 | 193 | go func() { 194 | defer func() { done <- struct{}{} }() 195 | for !stop { 196 | select { 197 | case repo, ok := <-queue: 198 | if !ok { 199 | return 200 | } 201 | 202 | select { 203 | case consumedRepos <- repo: 204 | case <-time.After(timeToStop): 205 | stop = true 206 | } 207 | } 208 | } 209 | }() 210 | 211 | err = discovery.Start() 212 | req.True(ErrDiscoveryStopped.Is(err)) 213 | 214 | close(queue) 215 | <-done 216 | req.False(stop) 217 | close(consumedRepos) 218 | 219 | for repo := range consumedRepos { 220 | ep, err := GetGHEndpoint(repo) 221 | req.NoError(err) 222 | req.True(strings.Contains(ep, org)) 223 | req.NotEqual("gitcollector", *repo.Name) 224 | } 225 | } 226 | 227 | // TODO request rate error ? 228 | 229 | // TestProxyMockUps 230 | // setup https proxy that returns error responses with different status codes 231 | // check that errors returned correspond to expected ones 232 | func TestProxyMockUps(t *testing.T) { 233 | for _, tst := range []struct { 234 | name string 235 | code int 236 | errContains string 237 | }{ 238 | {"301", http.StatusMovedPermanently, "Moved Permanently"}, 239 | {"400", http.StatusBadRequest, "Bad Request"}, 240 | {"403", http.StatusForbidden, "Forbidden"}, 241 | {"404", http.StatusNotFound, "Not Found"}, 242 | {"500", http.StatusInternalServerError, "Internal Server Error"}, 243 | {"501", http.StatusNotImplemented, "Not Implemented"}, 244 | {"502", http.StatusBadGateway, "Bad Gateway"}, 245 | {"503", http.StatusServiceUnavailable, "Service Unavailable"}, 246 | } { 247 | tst := tst 248 | t.Run(tst.name, func(t *testing.T) { 249 | testProxyMockUp(t, tst.code, tst.errContains) 250 | }) 251 | } 252 | } 253 | 254 | func testProxyMockUp(t *testing.T, code int, errContains string) { 255 | const org = "bblfsh" 256 | 257 | token, err := getToken() 258 | if err != nil { 259 | t.Skip(err.Error()) 260 | } 261 | 262 | healthyTransport := http.DefaultTransport 263 | defer func() { http.DefaultTransport = healthyTransport }() 264 | 265 | proxy, err := testutils.NewProxy( 266 | healthyTransport, 267 | &testutils.Options{ 268 | Code: code, 269 | KeyPath: "../_testdata/server.key", 270 | PemPath: "../_testdata/server.pem", 271 | }) 272 | require.NoError(t, err) 273 | 274 | require.NoError(t, proxy.Start()) 275 | defer func() { 276 | proxy.Stop() 277 | }() 278 | 279 | require.NoError(t, proxy.SetTransportProxy()) 280 | 281 | queue := make(chan *github.Repository, 50) 282 | advertiseRepos := func( 283 | _ context.Context, 284 | repos []*github.Repository, 285 | ) error { 286 | time.Sleep(time.Minute) 287 | for _, repo := range repos { 288 | queue <- repo 289 | } 290 | 291 | return nil 292 | } 293 | 294 | discovery := NewGitHub( 295 | advertiseRepos, 296 | NewGHOrgReposIter(org, []string{}, &GHReposIterOpts{ 297 | TimeNewRepos: 1 * time.Second, 298 | ResultsPerPage: 100, 299 | AuthToken: token, 300 | }), 301 | &GitHubOpts{ 302 | MaxJobBuffer: 50, 303 | AdvertiseTimeout: time.Second, 304 | }, 305 | ) 306 | 307 | err = discovery.Start() 308 | require.Error(t, err) 309 | require.Contains(t, err.Error(), errContains) 310 | } 311 | 312 | type advertiseCase struct { 313 | name string 314 | advErr error 315 | delay, timeout time.Duration 316 | errContains string 317 | } 318 | 319 | // TestAdvertiseErrors checks basic advertise cases: 320 | // 1) advertise function completed successfully 321 | // 2) advertise function has returned an error 322 | // 3) advertise function is being evaluated longer than AdvertiseTimeout 323 | func TestAdvertiseErrors(t *testing.T) { 324 | for _, tst := range []advertiseCase{ 325 | {"NoAdvertiseError", nil, 0, 0, couldNotFindNewRepos}, 326 | {"AdvertiseError", fmt.Errorf("advertise err"), 0, 0, "advertise err"}, 327 | {"AdvertiseTimeout", nil, 2 * time.Second, time.Second, "context deadline exceeded"}, 328 | } { 329 | tst := tst 330 | t.Run(tst.name, func(t *testing.T) { 331 | testAdvertise(t, tst) 332 | }) 333 | } 334 | } 335 | 336 | func testAdvertise(t *testing.T, ac advertiseCase) { 337 | const org = "bblfsh" 338 | 339 | token, err := getToken() 340 | if err != nil { 341 | t.Skip(err.Error()) 342 | } 343 | 344 | queue := make(chan *github.Repository, 50) 345 | advertiseRepos := func( 346 | ctx context.Context, 347 | repos []*github.Repository, 348 | ) error { 349 | time.Sleep(ac.delay) 350 | 351 | select { 352 | case <-ctx.Done(): 353 | return ctx.Err() 354 | default: 355 | } 356 | 357 | for _, repo := range repos { 358 | queue <- repo 359 | } 360 | 361 | return ac.advErr 362 | } 363 | 364 | discovery := NewGitHub( 365 | advertiseRepos, 366 | NewGHOrgReposIter(org, []string{}, &GHReposIterOpts{ 367 | TimeNewRepos: 1 * time.Second, 368 | ResultsPerPage: 100, 369 | AuthToken: token, 370 | }), 371 | &GitHubOpts{ 372 | MaxJobBuffer: 50, 373 | AdvertiseTimeout: ac.timeout, 374 | }, 375 | ) 376 | 377 | err = discovery.Start() 378 | require.Error(t, err) 379 | require.Contains(t, err.Error(), ac.errContains) 380 | } 381 | 382 | func getToken() (string, error) { 383 | token := os.Getenv("GITHUB_TOKEN") 384 | ci := os.Getenv("TRAVIS") 385 | var err error 386 | if token == "" && ci == "true" { 387 | err = fmt.Errorf("test running on travis CI but " + 388 | "couldn't find GITHUB_TOKEN") 389 | } 390 | 391 | return token, err 392 | } 393 | -------------------------------------------------------------------------------- /downloader/download_test.go: -------------------------------------------------------------------------------- 1 | package downloader 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "net" 7 | "net/http" 8 | "os" 9 | "runtime" 10 | "strings" 11 | "sync" 12 | "testing" 13 | 14 | "github.com/src-d/gitcollector/downloader/testhelper" 15 | "github.com/src-d/gitcollector/library" 16 | "github.com/src-d/gitcollector/testutils" 17 | 18 | "github.com/src-d/go-borges" 19 | "github.com/src-d/go-borges/siva" 20 | "github.com/stretchr/testify/require" 21 | "gopkg.in/src-d/go-billy.v4" 22 | "gopkg.in/src-d/go-git.v4/plumbing/transport" 23 | "gopkg.in/src-d/go-log.v1" 24 | ) 25 | 26 | type protocol string 27 | 28 | var ( 29 | httpsProtocol = protocol("https") 30 | gitProtocol = protocol("git") 31 | 32 | errBrokenFS = testhelper.ErrBrokenFS 33 | ) 34 | 35 | // TODO move this data to some config 36 | var ( 37 | tests = []*test{ 38 | { 39 | locID: borges.LocationID("a6c64c655d15afda789f8138b83213782b6f77c7"), 40 | repoIDs: []borges.RepositoryID{ 41 | borges.RepositoryID("github.com/prakhar1989/awesome-courses"), 42 | borges.RepositoryID("github.com/Leo-xxx/awesome-courses"), 43 | borges.RepositoryID("github.com/manjunath00/awesome-courses"), 44 | }, 45 | }, 46 | { 47 | locID: borges.LocationID("fe83b066a45d859cd40cbf512c4ec20351c4f9d9"), 48 | repoIDs: []borges.RepositoryID{ 49 | borges.RepositoryID("github.com/MunGell/awesome-for-beginners"), 50 | borges.RepositoryID("github.com/dhruvil1514/awesome-for-beginners"), 51 | borges.RepositoryID("github.com/karellism/awesome-for-beginners"), 52 | }, 53 | }, 54 | { 55 | locID: borges.LocationID("1880dc904e1b2774be9c97a7b85efabdb910f974"), 56 | repoIDs: []borges.RepositoryID{ 57 | borges.RepositoryID("github.com/jtleek/datasharing"), 58 | borges.RepositoryID("github.com/diptadhi/datasharing"), 59 | borges.RepositoryID("github.com/nmorr041/datasharing"), 60 | }, 61 | }, 62 | { 63 | locID: borges.LocationID("3974996807a9f596cf25ac3a714995c24bb97e2c"), 64 | repoIDs: []borges.RepositoryID{ 65 | borges.RepositoryID("github.com/rtyley/small-test-repo"), 66 | borges.RepositoryID("github.com/kuldeep992/small-test-repo"), 67 | borges.RepositoryID("github.com/kuldeep-singh-blueoptima/small-test-repo"), 68 | }, 69 | }, 70 | { 71 | locID: borges.LocationID("6671f3b1147324f4fb1fbbe2aba843031738f59e"), 72 | repoIDs: []borges.RepositoryID{ 73 | borges.RepositoryID("github.com/enaqx/awesome-pentest"), 74 | borges.RepositoryID("github.com/Inter1292/awesome-pentest"), 75 | borges.RepositoryID("github.com/apelsin83/awesome-pentest"), 76 | }, 77 | }, 78 | { 79 | locID: borges.LocationID("cce60e1b6fb7ad56d07cbcaee7a62030f7d01777"), 80 | repoIDs: []borges.RepositoryID{ 81 | borges.RepositoryID("github.com/kahun/awesome-sysadmin"), 82 | borges.RepositoryID("github.com/apoliukh/awesome-sysadmin"), 83 | borges.RepositoryID("github.com/gauravaristocrat/awesome-sysadmin"), 84 | }, 85 | }, 86 | { 87 | locID: borges.LocationID("f2cee90acf3c6644d51a37057845b98ab1580932"), 88 | repoIDs: []borges.RepositoryID{ 89 | borges.RepositoryID("github.com/jtoy/awesome-tensorflow"), 90 | borges.RepositoryID("github.com/SiweiLuo/awesome-tensorflow"), 91 | borges.RepositoryID("github.com/youtang1993/awesome-tensorflow"), 92 | }, 93 | }, 94 | } 95 | 96 | repoIDsFlat = getRepoIDsFlat() 97 | 98 | testPrivateRepo = &test{ 99 | locID: borges.LocationID("2ea758d7c7cbc249acfd6fc4a67f926cae28c10e"), 100 | repoIDs: []borges.RepositoryID{ 101 | borges.RepositoryID("github.com/lwsanty/super-private-privacy-keep-out"), 102 | }, 103 | } 104 | ) 105 | 106 | type test struct { 107 | locID borges.LocationID 108 | repoIDs []borges.RepositoryID 109 | } 110 | 111 | /* TODO: 112 | - network errors 113 | - resolveCommit https://codecov.io/gh/src-d/gitcollector/compare/4dc597c03b4a5106fbf3ba87f35835cf171fb8ee...1310d5b5c391847255f4227eb1746a31871418a3/src/downloader/git.go#L164 114 | */ 115 | 116 | func TestAll(t *testing.T) { 117 | h, close, err := testhelper.NewHelper() 118 | defer close() 119 | require.NoError(t, err) 120 | 121 | for _, tst := range []struct { 122 | name string 123 | tFunc func(t *testing.T, h *testhelper.Helper) 124 | }{ 125 | {"testLibraryCreationFailed", testLibraryCreationFailed}, 126 | {"testFSFailedStatFail", testFSFailedStatFail}, 127 | {"testFSFailedOpenFileFail", testFSFailedOpenFileFail}, 128 | {"testAuthSuccess", testAuthSuccess}, 129 | {"testAuthErrors", testAuthErrors}, 130 | {"testContextCancelledFail", testContextCancelledFail}, 131 | {"testContextCancelledPrepareRepo", testContextCancelledPrepareRepo}, 132 | {"testWrongEndpointFail", testWrongEndpointFail}, 133 | {"testAlreadyDownloadedFail", testAlreadyDownloadedFail}, 134 | {"testDownloadConcurrentSuccess", testDownloadConcurrentSuccess}, 135 | {"testPeriodicallyBrokenGithubAPI", testPeriodicallyBrokenGithubAPI}, 136 | } { 137 | tst := tst 138 | t.Run(tst.name, func(t *testing.T) { 139 | tst.tFunc(t, h) 140 | close() 141 | }) 142 | } 143 | } 144 | 145 | // testLibraryCreationFailed 146 | // 1) try to create *siva.NewLibrary using fs with broken OpenFile method 147 | // error that contains broken fs mocked error's text 148 | func testLibraryCreationFailed(t *testing.T, h *testhelper.Helper) { 149 | testFS := testhelper.NewBrokenFS(h.FS, testhelper.BrokenFSOptions{FailedOpen: true}) 150 | _, err := newLibrary(testFS) 151 | require.Error(t, err) 152 | require.Contains(t, err.Error(), errBrokenFS.Error()) 153 | } 154 | 155 | // testFSFailedStatFail 156 | // 1) try to execute download job using fs with broken Stats method 157 | // error that contains broken fs mocked error's text 158 | func testFSFailedStatFail(t *testing.T, h *testhelper.Helper) { 159 | testFSWithErrors(t, h, testhelper.BrokenFSOptions{FailedStat: true}) 160 | } 161 | 162 | // testFSFailedOpenFileFail 163 | // 1) try to execute download job using fs with broken OpenFile method 164 | // error that contains broken fs mocked error's text 165 | func testFSFailedOpenFileFail(t *testing.T, h *testhelper.Helper) { 166 | testFSWithErrors(t, h, testhelper.BrokenFSOptions{FailedOpenFile: true}) 167 | } 168 | 169 | func testFSWithErrors(t *testing.T, h *testhelper.Helper, fsOpts testhelper.BrokenFSOptions) { 170 | testFS := testhelper.NewBrokenFS(h.FS, fsOpts) 171 | lib, err := newLibrary(testFS) 172 | require.NoError(t, err) 173 | 174 | testRepo := tests[0].repoIDs[0] 175 | job := &library.Job{ 176 | Lib: lib, 177 | Type: library.JobDownload, 178 | TempFS: h.TempFS, 179 | AuthToken: func(string) string { return "" }, 180 | Logger: log.New(nil), 181 | } 182 | job.SetEndpoints([]string{endPoint(gitProtocol, testRepo)}) 183 | 184 | err = Download(context.Background(), job) 185 | require.Error(t, err) 186 | require.Contains(t, err.Error(), errBrokenFS.Error()) 187 | } 188 | 189 | // testAuthSuccess 190 | // 1) try to execute download job of a private repo with valid token via https protocol 191 | // error: nil 192 | func testAuthSuccess(t *testing.T, h *testhelper.Helper) { 193 | t.Skip("skip this test until separate org is created") 194 | 195 | token := os.Getenv("GITHUB_TOKEN") 196 | if token == "" { 197 | t.Skip() 198 | } 199 | 200 | job := &library.Job{ 201 | Lib: h.Lib, 202 | Type: library.JobDownload, 203 | TempFS: h.TempFS, 204 | AuthToken: func(string) string { return token }, 205 | Logger: log.New(nil), 206 | } 207 | job.SetEndpoints([]string{endPoint(httpsProtocol, testPrivateRepo.repoIDs[0])}) 208 | 209 | require.NoError(t, Download(context.Background(), job)) 210 | } 211 | 212 | // testAuthErrors 213 | // 1) try to execute download job of a private repo with corrupted token via git protocol 214 | // error: invalid auth method 215 | // 2) try to execute download job with corrupted token via https protocol 216 | // error: authentication required 217 | func testAuthErrors(t *testing.T, h *testhelper.Helper) { 218 | getJob := func(p protocol) *library.Job { 219 | job := &library.Job{ 220 | Lib: h.Lib, 221 | Type: library.JobDownload, 222 | TempFS: h.TempFS, 223 | AuthToken: func(string) string { return "42" }, 224 | Logger: log.New(nil), 225 | } 226 | job.SetEndpoints([]string{endPoint(p, testPrivateRepo.repoIDs[0])}) 227 | 228 | return job 229 | } 230 | 231 | ctx := context.Background() 232 | require.Equal(t, transport.ErrInvalidAuthMethod, Download(ctx, getJob(gitProtocol))) 233 | require.Equal(t, transport.ErrAuthenticationRequired, Download(ctx, getJob(httpsProtocol))) 234 | } 235 | 236 | // testContextCancelledFail 237 | // 1) prepare context and cancel it 238 | // 2) start download try to execute download job with canceled context passed 239 | // error: context canceled 240 | func testContextCancelledFail(t *testing.T, h *testhelper.Helper) { 241 | ctx, cancel := context.WithCancel(context.Background()) 242 | cancel() 243 | 244 | testRepo := tests[0].repoIDs[0] 245 | job := &library.Job{ 246 | Lib: h.Lib, 247 | Type: library.JobDownload, 248 | TempFS: h.TempFS, 249 | AuthToken: func(string) string { return "" }, 250 | Logger: log.New(nil), 251 | } 252 | job.SetEndpoints([]string{endPoint(gitProtocol, testRepo)}) 253 | 254 | require.Equal(t, context.Canceled, Download(ctx, job)) 255 | } 256 | 257 | // testContextCancelledPrepareRepo 258 | // 1) tries to prepare a repository with a cancelled context. Previously this 259 | // caused a race condition now it should be correct. 260 | func testContextCancelledPrepareRepo(t *testing.T, h *testhelper.Helper) { 261 | ctx, cancel := context.WithCancel(context.Background()) 262 | cancel() 263 | 264 | testRepo := tests[0].repoIDs[0] 265 | repo, err := PrepareRepository(ctx, h.Lib, "location", testRepo, 266 | endPoint(gitProtocol, testRepo), h.TempFS, "tmp") 267 | require.Error(t, err) 268 | require.Nil(t, repo) 269 | } 270 | 271 | // testWrongEndpointFail 272 | // 1) try to execute download job with corrupted endpoint to the repo 273 | // returned error should have type *net.OpError 274 | // error should contain "no such host" 275 | func testWrongEndpointFail(t *testing.T, h *testhelper.Helper) { 276 | const corruptedEndpoint = "git://42.git" 277 | 278 | job := &library.Job{ 279 | Lib: h.Lib, 280 | Type: library.JobDownload, 281 | TempFS: h.TempFS, 282 | AuthToken: func(string) string { return "" }, 283 | Logger: log.New(nil), 284 | } 285 | job.SetEndpoints([]string{corruptedEndpoint}) 286 | 287 | err := Download(context.Background(), job) 288 | require.Error(t, err) 289 | 290 | e, ok := err.(*net.OpError) 291 | if !ok { 292 | t.Fatal("received error " + err.Error() + " is not *net.OpError") 293 | } 294 | require.Contains(t, e.Err.Error(), "no such host") 295 | } 296 | 297 | // testAlreadyDownloadedFail 298 | // 1) exec download job for a test repo 299 | // 2) try to download it again 300 | // error: already downloaded 301 | func testAlreadyDownloadedFail(t *testing.T, h *testhelper.Helper) { 302 | testRepo := tests[0].repoIDs[0] 303 | job := &library.Job{ 304 | Lib: h.Lib, 305 | Type: library.JobDownload, 306 | TempFS: h.TempFS, 307 | AuthToken: func(string) string { return "" }, 308 | Logger: log.New(nil), 309 | } 310 | job.SetEndpoints([]string{endPoint(gitProtocol, testRepo)}) 311 | 312 | ctx := context.Background() 313 | require.NoError(t, Download(ctx, job)) 314 | require.True(t, ErrRepoAlreadyExists.Is(Download(ctx, job))) 315 | } 316 | 317 | // testDownloadConcurrentSuccess 318 | // 1) start several download jobs for several orgs 319 | // 2) for each org 320 | // 2.1) get location by id 321 | // error: nil 322 | // repositories ids match the initial ones 323 | func testDownloadConcurrentSuccess(t *testing.T, h *testhelper.Helper) { 324 | errs := concurrentDownloads(h, gitProtocol) 325 | for err := range errs { 326 | require.NoError(t, err) 327 | } 328 | 329 | for _, test := range tests { 330 | t.Run(string(test.locID), func(t *testing.T) { 331 | loc, err := h.Lib.Location(test.locID) 332 | require.NoError(t, err) 333 | 334 | iter, err := loc.Repositories(borges.ReadOnlyMode) 335 | require.NoError(t, err) 336 | 337 | var repoIDs []borges.RepositoryID 338 | require.NoError(t, iter.ForEach(func(r borges.Repository) error { 339 | repoIDs = append(repoIDs, r.ID()) 340 | return nil 341 | })) 342 | 343 | require.ElementsMatch(t, test.repoIDs, repoIDs) 344 | }) 345 | } 346 | } 347 | 348 | // testPeriodicallyBrokenGithubAPI 349 | // 1) set up https proxy that returns 500 response once in several requests 350 | // 2) start several download jobs for several orgs in parallel 351 | // check that part of jobs failed with corresponding error 352 | // check that another part of jobs was successfully downloaded 353 | func testPeriodicallyBrokenGithubAPI(t *testing.T, h *testhelper.Helper) { 354 | if runtime.GOOS == "darwin" { 355 | t.Skip("cannot run these tests on osx") 356 | } 357 | 358 | const failEach = 5 359 | 360 | healthyTransport := http.DefaultTransport 361 | defer func() { http.DefaultTransport = healthyTransport }() 362 | 363 | proxy, err := testutils.NewProxy( 364 | healthyTransport, 365 | &testutils.Options{ 366 | FailEachNthRequest: failEach, 367 | FailEachNthCode: http.StatusInternalServerError, 368 | KeyPath: "../_testdata/server.key", 369 | PemPath: "../_testdata/server.pem", 370 | }) 371 | require.NoError(t, err) 372 | 373 | require.NoError(t, proxy.Start()) 374 | defer func() { proxy.Stop() }() 375 | 376 | require.NoError(t, proxy.SetTransportProxy()) 377 | 378 | errs := concurrentDownloads(h, httpsProtocol) 379 | var failedCounter int 380 | blackListRepoIDs := make(map[borges.RepositoryID]struct{}) 381 | for err := range errs { 382 | if err != nil { 383 | if strings.Contains(err.Error(), "EOF") { 384 | require.Contains(t, err.Error(), "EOF") 385 | } else { 386 | require.Contains(t, err.Error(), "Internal Server Error") 387 | } 388 | log.Infof("error: %q", err.Error()) 389 | blackListRepoIDs[getRepoIDFromErrorText(err.Error())] = struct{}{} 390 | failedCounter++ 391 | } 392 | } 393 | require.True(t, failedCounter >= failEach || failedCounter < len(repoIDsFlat), "act: %v", failedCounter) 394 | 395 | for _, test := range tests { 396 | t.Run(string(test.locID), func(t *testing.T) { 397 | loc, err := h.Lib.Location(test.locID) 398 | require.NoError(t, err) 399 | 400 | var expRepoIDs []borges.RepositoryID 401 | for _, rid := range test.repoIDs { 402 | if _, ok := blackListRepoIDs[rid]; !ok { 403 | expRepoIDs = append(expRepoIDs, rid) 404 | } 405 | } 406 | 407 | iter, err := loc.Repositories(borges.ReadOnlyMode) 408 | require.NoError(t, err) 409 | 410 | var actRepoIDs []borges.RepositoryID 411 | require.NoError(t, iter.ForEach(func(r borges.Repository) error { 412 | actRepoIDs = append(actRepoIDs, r.ID()) 413 | return nil 414 | })) 415 | 416 | require.ElementsMatch(t, expRepoIDs, actRepoIDs) 417 | }) 418 | } 419 | } 420 | 421 | func concurrentDownloads(h *testhelper.Helper, p protocol) chan error { 422 | var jobs []*library.Job 423 | for _, test := range tests { 424 | for _, id := range test.repoIDs { 425 | job := &library.Job{ 426 | Lib: h.Lib, 427 | Type: library.JobDownload, 428 | TempFS: h.TempFS, 429 | AuthToken: func(string) string { return "" }, 430 | Logger: log.New(nil), 431 | } 432 | job.SetEndpoints([]string{endPoint(p, id)}) 433 | 434 | jobs = append(jobs, job) 435 | } 436 | } 437 | 438 | ctx := context.Background() 439 | 440 | var wg sync.WaitGroup 441 | errs := make(chan error, len(jobs)) 442 | wg.Add(len(jobs)) 443 | for _, job := range jobs { 444 | j := job 445 | go func() { 446 | errs <- Download(ctx, j) 447 | wg.Done() 448 | }() 449 | } 450 | 451 | wg.Wait() 452 | close(errs) 453 | 454 | return errs 455 | } 456 | 457 | // newLibrary is a wrapper around siva.NewLibrary 458 | func newLibrary(fs billy.Filesystem) (*siva.Library, error) { 459 | return siva.NewLibrary("test", fs, &siva.LibraryOptions{ 460 | Bucket: 2, 461 | Transactional: true, 462 | }) 463 | } 464 | 465 | func endPoint(p protocol, repoID interface{}) string { 466 | return fmt.Sprintf("%s://%s.git", p, repoID) 467 | } 468 | 469 | func getRepoIDsFlat() (res []borges.RepositoryID) { 470 | for _, test := range tests { 471 | for _, rid := range test.repoIDs { 472 | res = append(res, rid) 473 | } 474 | } 475 | return 476 | } 477 | 478 | func getRepoIDFromErrorText(text string) borges.RepositoryID { 479 | return borges.RepositoryID(getStringInBetween(text, "https://", ".git")) 480 | } 481 | 482 | func getStringInBetween(str string, start string, end string) (result string) { 483 | s := strings.Index(str, start) 484 | if s == -1 { 485 | return 486 | } 487 | s += len(start) 488 | e := strings.Index(str, end) 489 | return str[s:e] 490 | } 491 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= 2 | github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 h1:w+iIsaOQNcT7OZ575w+acHgRric5iCyQh+xv+KJ4HB8= 3 | github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX56iTiv29bbRTIsUNlaFfuhWRQBWjQdVyAevI8= 4 | github.com/Microsoft/go-winio v0.4.14 h1:+hMXMk01us9KgxGb7ftKQt2Xpf5hH/yky+TDA+qxleU= 5 | github.com/Microsoft/go-winio v0.4.14/go.mod h1:qXqCSQ3Xa7+6tgxaGTIe4Kpcdsi+P8jBhyzoq1bpyYA= 6 | github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 h1:TngWCqHvy9oXAN6lEVMRuU21PR1EtLVZJmdB18Gu3Rw= 7 | github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5/go.mod h1:lmUJ/7eu/Q8D7ML55dXQrVaamCz2vxCfdQBasLZfHKk= 8 | github.com/alcortesm/tgz v0.0.0-20161220082320-9c5fe88206d7 h1:uSoVVbwJiQipAclBbw+8quDsfcvFjOpI5iCf4p/cqCs= 9 | github.com/alcortesm/tgz v0.0.0-20161220082320-9c5fe88206d7/go.mod h1:6zEj6s6u/ghQa61ZWa/C2Aw3RkjiTBOix7dkqa1VLIs= 10 | github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239 h1:kFOfPq6dUM1hTo4JG6LR5AXSUEsOjtdm0kw0FtQtMJA= 11 | github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239/go.mod h1:2FmKhYUyUczH0OGQWaF5ceTx0UBShxjsH6f8oGKYe2c= 12 | github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= 13 | github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= 14 | github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4= 15 | github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= 16 | github.com/containerd/continuity v0.0.0-20190827140505-75bee3e2ccb6 h1:NmTXa/uVnDyp0TY5MKi197+3HWcnYWfnHGyaFthlnGw= 17 | github.com/containerd/continuity v0.0.0-20190827140505-75bee3e2ccb6/go.mod h1:GL3xCUCBDV3CZiTSEKksMWbLE66hEyuu9qyDOOqM47Y= 18 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 19 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 20 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 21 | github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ= 22 | github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec= 23 | github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw= 24 | github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= 25 | github.com/emirpasic/gods v1.9.0/go.mod h1:YfzfFFoVP/catgzJb4IKIqXjX78Ha8FMSDh3ymbK86o= 26 | github.com/emirpasic/gods v1.12.0 h1:QAUIPSaCu4G+POclxeqb3F+WPpdKqFGlw36+yOzGlrg= 27 | github.com/emirpasic/gods v1.12.0/go.mod h1:YfzfFFoVP/catgzJb4IKIqXjX78Ha8FMSDh3ymbK86o= 28 | github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568 h1:BHsljHzVlRcyQhjrss6TZTdY2VfCqZPbv5k3iBFa2ZQ= 29 | github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI435gkrCt3MPfRiAkVrwSbHsst4LCFVfpJc= 30 | github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I= 31 | github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= 32 | github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk= 33 | github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= 34 | github.com/gliderlabs/ssh v0.1.1 h1:j3L6gSLQalDETeEg/Jg0mGY0/y/N6zI2xX1978P0Uqw= 35 | github.com/gliderlabs/ssh v0.1.1/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0= 36 | github.com/gliderlabs/ssh v0.1.3/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0= 37 | github.com/gliderlabs/ssh v0.1.4 h1:5N8AYXpaQAPy0L7linKa5aI+WRfyYagAhjksVzxh+mI= 38 | github.com/gliderlabs/ssh v0.1.4/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0= 39 | github.com/gliderlabs/ssh v0.2.2 h1:6zsha5zo/TWhRhwqCD3+EarCAgZ2yN28ipRnGPnwkI0= 40 | github.com/gliderlabs/ssh v0.2.2/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0= 41 | github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM= 42 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 43 | github.com/google/go-cmp v0.2.0 h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ= 44 | github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= 45 | github.com/google/go-cmp v0.3.0 h1:crn/baboCvb5fXaQ0IJ1SGTsTVrWpDsCWC8EGETZijY= 46 | github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= 47 | github.com/google/go-github/v28 v28.1.1 h1:kORf5ekX5qwXO2mGzXXOjMe/g6ap8ahVe0sBEulhSxo= 48 | github.com/google/go-github/v28 v28.1.1/go.mod h1:bsqJWQX05omyWVmc00nEUql9mhQyv38lDZ8kPZcQVoM= 49 | github.com/google/go-querystring v1.0.0 h1:Xkwi/a1rcvNg1PPYe5vI8GbeBY/jrVuDX5ASuANWTrk= 50 | github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= 51 | github.com/google/uuid v1.1.1 h1:Gkbcsh/GbpXz7lPftLA3P6TYMwjCLYm83jiFQZF/3gY= 52 | github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 53 | github.com/gotestyourself/gotestyourself v2.2.0+incompatible h1:AQwinXlbQR2HvPjQZOmDhRqsv5mZf+Jb1RnSLxcqZcI= 54 | github.com/gotestyourself/gotestyourself v2.2.0+incompatible/go.mod h1:zZKM6oeNM8k+FRljX1mnzVYeS8wiGgQyvST1/GafPbY= 55 | github.com/hashicorp/golang-lru v0.5.1 h1:0hERBMJE1eitiLkihrMvRVBYAkpHzc/J3QdDN+dAcgU= 56 | github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= 57 | github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= 58 | github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= 59 | github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A= 60 | github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo= 61 | github.com/jessevdk/go-flags v1.4.0 h1:4IU2WS7AumrZ/40jfhf4QVDMsQwqA7VEHozFRrGARJA= 62 | github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= 63 | github.com/jpillora/backoff v0.0.0-20180909062703-3050d21c67d7 h1:K//n/AqR5HjG3qxbrBCL4vJPW0MVFSs9CPK1OOJdRME= 64 | github.com/jpillora/backoff v0.0.0-20180909062703-3050d21c67d7/go.mod h1:2iMrUgbbvHEiQClaW2NsSzMyGHqN+rDFqY705q49KG0= 65 | github.com/kami-zh/go-capturer v0.0.0-20171211120116-e492ea43421d h1:cVtBfNW5XTHiKQe7jDaDBSh/EVM4XLPutLAGboIXuM0= 66 | github.com/kami-zh/go-capturer v0.0.0-20171211120116-e492ea43421d/go.mod h1:P2viExyCEfeWGU259JnaQ34Inuec4R38JCyBx2edgD0= 67 | github.com/kevinburke/ssh_config v0.0.0-20180830205328-81db2a75821e h1:RgQk53JHp/Cjunrr1WlsXSZpqXn+uREuHvUVcK82CV8= 68 | github.com/kevinburke/ssh_config v0.0.0-20180830205328-81db2a75821e/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF4nAY/ojJ6r6mM= 69 | github.com/kevinburke/ssh_config v0.0.0-20190630040420-2e50c441276c h1:VAx3LRNjVNvjtgO7KFRuT/3aye/0zJvwn01rHSfoolo= 70 | github.com/kevinburke/ssh_config v0.0.0-20190630040420-2e50c441276c/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF4nAY/ojJ6r6mM= 71 | github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGiHgQ4OO8tzTaLawm8vnODuwDk= 72 | github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= 73 | github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= 74 | github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= 75 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 76 | github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= 77 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 78 | github.com/lib/pq v1.1.1 h1:sJZmqHoEaY7f+NPP8pgLB/WxulyR3fewgCM2qaSlBb4= 79 | github.com/lib/pq v1.1.1/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= 80 | github.com/mattn/go-colorable v0.1.2 h1:/bC9yWikZXAL9uJdulbSfyVNIR3n3trXl+v8+1sx8mU= 81 | github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= 82 | github.com/mattn/go-isatty v0.0.8 h1:HLtExJ+uU2HOZ+wI0Tt5DtUDrx8yhUqDcp7fYERX4CE= 83 | github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= 84 | github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b h1:j7+1HpAFS1zy5+Q4qx1fWh90gTKwiN4QCGoY9TWyyO4= 85 | github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE= 86 | github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= 87 | github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= 88 | github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= 89 | github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= 90 | github.com/onsi/ginkgo v1.8.0 h1:VkHVNpR4iVnU8XQR6DBm8BqYjN7CRzw+xKUbVVbbW9w= 91 | github.com/onsi/ginkgo v1.8.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= 92 | github.com/onsi/gomega v1.5.0 h1:izbySO9zDPmjJ8rDjLvkA2zJHIo+HkYXHnf7eN7SSyo= 93 | github.com/onsi/gomega v1.5.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= 94 | github.com/opencontainers/go-digest v1.0.0-rc1 h1:WzifXhOVOEOuFYOJAW6aQqW0TooG2iki3E3Ii+WN7gQ= 95 | github.com/opencontainers/go-digest v1.0.0-rc1/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s= 96 | github.com/opencontainers/image-spec v1.0.1 h1:JMemWkRwHx4Zj+fVxWoMCFm/8sYGGrUVojFA6h/TRcI= 97 | github.com/opencontainers/image-spec v1.0.1/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0= 98 | github.com/opencontainers/runc v0.1.1 h1:GlxAyO6x8rfZYN9Tt0Kti5a/cP41iuiO2yYT0IJGY8Y= 99 | github.com/opencontainers/runc v0.1.1/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U= 100 | github.com/ory/dockertest v3.3.5+incompatible h1:iLLK6SQwIhcbrG783Dghaaa3WPzGc+4Emza6EbVUUGA= 101 | github.com/ory/dockertest v3.3.5+incompatible/go.mod h1:1vX4m9wsvi00u5bseYwXaSnhNrne+V0E6LAcBILJdPs= 102 | github.com/pelletier/go-buffruneio v0.2.0 h1:U4t4R6YkofJ5xHm3dJzuRpPZ0mr5MMCoAWooScCR7aA= 103 | github.com/pelletier/go-buffruneio v0.2.0/go.mod h1:JkE26KsDizTr40EUHkXVtNPvgGtbSNq5BcowyYOWdKo= 104 | github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 105 | github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= 106 | github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 107 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 108 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 109 | github.com/sergi/go-diff v1.0.0 h1:Kpca3qRNrduNnOQeazBd0ysaKrUJiIuISHxogkT9RPQ= 110 | github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= 111 | github.com/sirupsen/logrus v1.4.1/go.mod h1:ni0Sbl8bgC9z8RoU9G6nDWqqs/fq4eDPysMBDgk/93Q= 112 | github.com/sirupsen/logrus v1.4.2 h1:SPIRibHv4MatM3XXNO2BJeFLZwZ2LvZgfQ5+UNI2im4= 113 | github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= 114 | github.com/src-d/envconfig v1.0.0 h1:/AJi6DtjFhZKNx3OB2qMsq7y4yT5//AeSZIe7rk+PX8= 115 | github.com/src-d/envconfig v1.0.0/go.mod h1:Q9YQZ7BKITldTBnoxsE5gOeB5y66RyPXeue/R4aaNBc= 116 | github.com/src-d/gcfg v1.4.0 h1:xXbNR5AlLSA315x2UO+fTSSAXCDf+Ar38/6oyGbDKQ4= 117 | github.com/src-d/gcfg v1.4.0/go.mod h1:p/UMsR43ujA89BJY9duynAwIpvqEujIH/jFlfL7jWoI= 118 | github.com/src-d/go-borges v0.0.0-20190704083038-44867e8f2a2a h1:2ZqLC9a4F/pf7Z3tYELgLYdEQxG6jXrw5G1H12dy8dU= 119 | github.com/src-d/go-borges v0.0.0-20190704083038-44867e8f2a2a/go.mod h1:Myl/zHrk3iT/I5T08RTBpuGzchucytSsi6p7KzM2lOA= 120 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 121 | github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 122 | github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w= 123 | github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= 124 | github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= 125 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 126 | github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= 127 | github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= 128 | github.com/x-cray/logrus-prefixed-formatter v0.5.2 h1:00txxvfBM9muc0jiLIEAkAcIMJzfthRT6usrui8uGmg= 129 | github.com/x-cray/logrus-prefixed-formatter v0.5.2/go.mod h1:2duySbKsL6M18s5GU7VPsoEPHyzalCE06qoARUCeBBE= 130 | github.com/xanzy/ssh-agent v0.2.0 h1:Adglfbi5p9Z0BmK2oKU9nTG+zKfniSfnaMYB+ULd+Ro= 131 | github.com/xanzy/ssh-agent v0.2.0/go.mod h1:0NyE30eGUDliuLEHJgYte/zncp2zdTStcOnWhgSqHD8= 132 | github.com/xanzy/ssh-agent v0.2.1 h1:TCbipTQL2JiiCprBWx9frJ2eJlCYT00NmctrHxVAr70= 133 | github.com/xanzy/ssh-agent v0.2.1/go.mod h1:mLlQY/MoOhWBj+gOGMQkOeiEvkx+8pJSI+0Bx9h2kr4= 134 | golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= 135 | golang.org/x/crypto v0.0.0-20190219172222-a4c6cb3142f2/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= 136 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 h1:VklqNMn3ovrHsnt90PveolxSbWFaJdECFbxSq0Mqo2M= 137 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 138 | golang.org/x/crypto v0.0.0-20190422183909-d864b10871cd/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 139 | golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5 h1:58fnuSXlxZmFdJyvtTFVmVhcMLU6v5fEb/ok4wyqtNU= 140 | golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 141 | golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4 h1:HuIa8hRrWRSrqYzx1qI49NNxhdi2PrY7gxVSq1JjLDc= 142 | golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 143 | golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 144 | golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 145 | golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e h1:bRhVy7zSSasaqNksaRZiA5EEI+Ei4I1nO5Jh72wfHlg= 146 | golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 147 | golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 148 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3 h1:0GoQqolDA55aaLxZyTzK/Y2ePZzZTUrRacwib7cNsYQ= 149 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 150 | golang.org/x/net v0.0.0-20190502183928-7f726cade0ab/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 151 | golang.org/x/net v0.0.0-20190607181551-461777fb6f67 h1:rJJxsykSlULwd2P2+pg/rtnwN2FrWp4IuCxOSyS0V00= 152 | golang.org/x/net v0.0.0-20190607181551-461777fb6f67/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 153 | golang.org/x/net v0.0.0-20190628185345-da137c7871d7 h1:rTIdg5QFRR7XCaK4LCjBiPbx8j4DQRpdYMnGn/bJUEU= 154 | golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 155 | golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= 156 | golang.org/x/oauth2 v0.0.0-20190523182746-aaccbc9213b0 h1:xFEXbcD0oa/xhqQmMXztdZ0bWvexAWds+8c1gRN8nu0= 157 | golang.org/x/oauth2 v0.0.0-20190523182746-aaccbc9213b0/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= 158 | golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 159 | golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4 h1:YUO/7uOKsKeq9UokNS62b8FYywz3ker1l1vDZRCRefw= 160 | golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 161 | golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 162 | golang.org/x/sync v0.0.0-20190423024810-112230192c58 h1:8gQV6CLnAEikrhgkHFbMAEhagSSnXWGV915qUMm9mrU= 163 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 164 | golang.org/x/sys v0.0.0-20180903190138-2b024373dcd9/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 165 | golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 166 | golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 167 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a h1:1BGLXjeY4akVXGgbC9HugT3Jv3hCI0z56oJR5vAMgBU= 168 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 169 | golang.org/x/sys v0.0.0-20190221075227-b4e8571b14e0/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 170 | golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 171 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 172 | golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 173 | golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 174 | golang.org/x/sys v0.0.0-20190609082536-301114b31cce/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 175 | golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7 h1:LepdCS8Gf/MVejFIt8lsiexZATdoGVyp5bcyS+rYoUI= 176 | golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 177 | golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= 178 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 179 | golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= 180 | golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= 181 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 182 | google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= 183 | google.golang.org/appengine v1.4.0 h1:/wp5JvzpHIxhs/dumFmF7BXTf3Z+dd4uXta4kVyO508= 184 | google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= 185 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 186 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= 187 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 188 | gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= 189 | gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= 190 | gopkg.in/src-d/go-billy-siva.v4 v4.5.1 h1:+UdpGGmJjANhXwg6TCcTVbACUqsbtX19QvJ9AdeX4ts= 191 | gopkg.in/src-d/go-billy-siva.v4 v4.5.1/go.mod h1:4wKeCzOCSsdyFeM5+58M6ObU6FM+lZT12p7zm7A+9n0= 192 | gopkg.in/src-d/go-billy.v4 v4.2.1/go.mod h1:tm33zBoOwxjYHZIE+OV8bxTWFMJLrconzFMd38aARFk= 193 | gopkg.in/src-d/go-billy.v4 v4.3.0 h1:KtlZ4c1OWbIs4jCv5ZXrTqG8EQocr0g/d4DjNg70aek= 194 | gopkg.in/src-d/go-billy.v4 v4.3.0/go.mod h1:tm33zBoOwxjYHZIE+OV8bxTWFMJLrconzFMd38aARFk= 195 | gopkg.in/src-d/go-billy.v4 v4.3.1 h1:OkK1DmefDy1Z6Veu82wdNj/cLpYORhdX4qdaYCPwc7s= 196 | gopkg.in/src-d/go-billy.v4 v4.3.1/go.mod h1:tm33zBoOwxjYHZIE+OV8bxTWFMJLrconzFMd38aARFk= 197 | gopkg.in/src-d/go-cli.v0 v0.0.0-20190422143124-3a646154da79 h1:MBr8uUjT5gZe4udsLGNZ3nWy/+ck0LSB91mQjgpwLUI= 198 | gopkg.in/src-d/go-cli.v0 v0.0.0-20190422143124-3a646154da79/go.mod h1:z+K8VcOYVYcSwSjGebuDL6176A1XskgbtNl64NSg+n8= 199 | gopkg.in/src-d/go-errors.v1 v1.0.0 h1:cooGdZnCjYbeS1zb1s6pVAAimTdKceRrpn7aKOnNIfc= 200 | gopkg.in/src-d/go-errors.v1 v1.0.0/go.mod h1:q1cBlomlw2FnDBDNGlnh6X0jPihy+QxZfMMNxPCbdYg= 201 | gopkg.in/src-d/go-git-fixtures.v3 v3.1.1/go.mod h1:dLBcvytrw/TYZsNTWCnkNF2DSIlzWYqTe3rJR56Ac7g= 202 | gopkg.in/src-d/go-git-fixtures.v3 v3.5.0 h1:ivZFOIltbce2Mo8IjzUHAFoq/IylO9WHhNOAJK+LsJg= 203 | gopkg.in/src-d/go-git-fixtures.v3 v3.5.0/go.mod h1:dLBcvytrw/TYZsNTWCnkNF2DSIlzWYqTe3rJR56Ac7g= 204 | gopkg.in/src-d/go-git.v4 v4.11.0 h1:cJwWgJ0DXifrNrXM6RGN1Y2yR60Rr1zQ9Q5DX5S9qgU= 205 | gopkg.in/src-d/go-git.v4 v4.11.0/go.mod h1:Vtut8izDyrM8BUVQnzJ+YvmNcem2J89EmfZYCkLokZk= 206 | gopkg.in/src-d/go-git.v4 v4.12.0 h1:CKgvBCJCcdfNnyXPYI4Cp8PaDDAmAPEN0CtfEdEAbd8= 207 | gopkg.in/src-d/go-git.v4 v4.12.0/go.mod h1:zjlNnzc1Wjn43v3Mtii7RVxiReNP0fIu9npcXKzuNp4= 208 | gopkg.in/src-d/go-log.v1 v1.0.2 h1:dED4100pntH4l3qOTgD1xebQR6pVU8tuPbUCmqiMsb0= 209 | gopkg.in/src-d/go-log.v1 v1.0.2/go.mod h1:GN34hKP0g305ysm2/hctJ0Y8nWP3zxXXJ8GFabTyABE= 210 | gopkg.in/src-d/go-siva.v1 v1.5.0 h1:WowvbZTlz0SPoV7WNCGktPSi2yRK78HPyXl7wYqDeHE= 211 | gopkg.in/src-d/go-siva.v1 v1.5.0/go.mod h1:tk1jnIXawd/PTlRNWdr5V5lC0PttNJmu1fv7wt7IZlw= 212 | gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= 213 | gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= 214 | gopkg.in/warnings.v0 v0.1.2 h1:wFXVbFY8DY5/xOe1ECiWdKCzZlxgshcYVNkBHstARME= 215 | gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI= 216 | gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 217 | gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= 218 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 219 | gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I= 220 | gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 221 | gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo= 222 | gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw= 223 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . --------------------------------------------------------------------------------