├── repofilter ├── Makefile ├── Dockerfile └── main.go ├── golang-builder-1.5 ├── Dockerfile ├── build_environment.sh └── build.sh ├── golang-builder-1.6 ├── Dockerfile ├── build_environment.sh └── build.sh ├── golang-builder-1.7 ├── Dockerfile ├── build_environment.sh └── build.sh ├── pipelines └── pipeline.json └── getrepos.go /repofilter/Makefile: -------------------------------------------------------------------------------- 1 | all: compile docker 2 | 3 | compile: 4 | CGO_ENABLED=0 go build -o repofilter 5 | 6 | docker: 7 | sudo docker build --force-rm=true -t dwhitena/repofilter . 8 | -------------------------------------------------------------------------------- /repofilter/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:latest 2 | 3 | # FUSE 4 | RUN \ 5 | apt-get update -yq && \ 6 | apt-get install -yq --no-install-recommends \ 7 | git \ 8 | ca-certificates \ 9 | curl \ 10 | fuse && \ 11 | apt-get clean && \ 12 | rm -rf /var/lib/apt 13 | 14 | # Install Pachyderm job-shim 15 | RUN go get github.com/pachyderm/pachyderm && \ 16 | go get github.com/pachyderm/pachyderm/src/server/cmd/job-shim && \ 17 | cp $GOPATH/bin/job-shim /job-shim 18 | 19 | # add the filter binary (created with the Makefile) 20 | ADD repofilter /repofilter 21 | -------------------------------------------------------------------------------- /golang-builder-1.5/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.5 2 | 3 | # FUSE 4 | RUN \ 5 | apt-get update -yq && \ 6 | apt-get install -yq --no-install-recommends \ 7 | git \ 8 | ca-certificates \ 9 | curl \ 10 | fuse && \ 11 | apt-get clean && \ 12 | rm -rf /var/lib/apt 13 | 14 | # Install Pachyderm job-shim 15 | RUN go get github.com/pachyderm/pachyderm && \ 16 | go get github.com/pachyderm/pachyderm/src/server/cmd/job-shim && \ 17 | cp $GOPATH/bin/job-shim /job-shim 18 | 19 | # prepare the build environment 20 | RUN apt-get update && apt-get install -y upx-ucl 21 | RUN go get github.com/pwaller/goupx 22 | 23 | VOLUME /src 24 | WORKDIR /src 25 | 26 | COPY build_environment.sh / 27 | COPY build.sh / 28 | -------------------------------------------------------------------------------- /golang-builder-1.6/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.6 2 | 3 | # FUSE 4 | RUN \ 5 | apt-get update -yq && \ 6 | apt-get install -yq --no-install-recommends \ 7 | git \ 8 | ca-certificates \ 9 | curl \ 10 | fuse && \ 11 | apt-get clean && \ 12 | rm -rf /var/lib/apt 13 | 14 | # Install Pachyderm job-shim 15 | RUN go get github.com/pachyderm/pachyderm && \ 16 | go get github.com/pachyderm/pachyderm/src/server/cmd/job-shim && \ 17 | cp $GOPATH/bin/job-shim /job-shim 18 | 19 | # prepare the build environment 20 | RUN apt-get update && apt-get install -y upx-ucl 21 | RUN go get github.com/pwaller/goupx 22 | 23 | VOLUME /src 24 | WORKDIR /src 25 | 26 | COPY build_environment.sh / 27 | COPY build.sh / 28 | -------------------------------------------------------------------------------- /golang-builder-1.7/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.7 2 | 3 | # FUSE 4 | RUN \ 5 | apt-get update -yq && \ 6 | apt-get install -yq --no-install-recommends \ 7 | git \ 8 | ca-certificates \ 9 | curl \ 10 | fuse && \ 11 | apt-get clean && \ 12 | rm -rf /var/lib/apt 13 | 14 | # Install Pachyderm job-shim 15 | RUN go get github.com/pachyderm/pachyderm && \ 16 | go get github.com/pachyderm/pachyderm/src/server/cmd/job-shim && \ 17 | cp $GOPATH/bin/job-shim /job-shim 18 | 19 | # prepare the build environment 20 | RUN apt-get update && apt-get install -y upx-ucl 21 | RUN go get github.com/pwaller/goupx 22 | 23 | VOLUME /src 24 | WORKDIR /src 25 | 26 | COPY build_environment.sh / 27 | COPY build.sh / 28 | -------------------------------------------------------------------------------- /repofilter/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "io/ioutil" 6 | "log" 7 | "os" 8 | "strconv" 9 | ) 10 | 11 | func main() { 12 | 13 | csvfile, err := os.Open("/pfs/repodata/repodata.csv") 14 | if err != nil { 15 | log.Fatal(err) 16 | } 17 | defer csvfile.Close() 18 | 19 | reader := csv.NewReader(csvfile) 20 | reader.FieldsPerRecord = -1 21 | rawCSVdata, err := reader.ReadAll() 22 | if err != nil { 23 | log.Fatal(err) 24 | } 25 | 26 | count := 0 27 | for _, each := range rawCSVdata { 28 | intString := strconv.Itoa(count) 29 | d1 := []byte(each[0]) 30 | err = ioutil.WriteFile("/pfs/out/"+intString, d1, 0644) 31 | if err != nil { 32 | log.Fatal(err) 33 | } 34 | count++ 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /golang-builder-1.5/build_environment.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | tagName=$1 4 | 5 | # Grab the source code 6 | go get -d github.com/$REPONAME/... 7 | 8 | # Grab Go package name 9 | pkgName=github.com/$REPONAME 10 | 11 | if [ -z "$pkgName" ]; 12 | then 13 | echo "Error: Must add package name as env var" 14 | exit 992 15 | fi 16 | 17 | # Grab just first path listed in GOPATH 18 | goPath="${GOPATH%%:*}" 19 | 20 | # Construct Go package path 21 | pkgPath="$goPath/src/$pkgName" 22 | 23 | if [ -e "$pkgPath/vendor" ]; 24 | then 25 | # Enable vendor experiment 26 | export GO15VENDOREXPERIMENT=1 27 | elif [ -e "$pkgPath/Godeps/_workspace" ]; 28 | then 29 | # Add local godeps dir to GOPATH 30 | GOPATH=$pkgPath/Godeps/_workspace:$GOPATH 31 | fi 32 | -------------------------------------------------------------------------------- /golang-builder-1.6/build_environment.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | tagName=$1 4 | 5 | # Grab the source code 6 | go get -d github.com/$REPONAME/... 7 | 8 | # Grab Go package name 9 | pkgName=github.com/$REPONAME 10 | 11 | if [ -z "$pkgName" ]; 12 | then 13 | echo "Error: Must add package name as env var" 14 | exit 992 15 | fi 16 | 17 | # Grab just first path listed in GOPATH 18 | goPath="${GOPATH%%:*}" 19 | 20 | # Construct Go package path 21 | pkgPath="$goPath/src/$pkgName" 22 | 23 | if [ -e "$pkgPath/vendor" ]; 24 | then 25 | # Enable vendor experiment 26 | export GO15VENDOREXPERIMENT=1 27 | elif [ -e "$pkgPath/Godeps/_workspace" ]; 28 | then 29 | # Add local godeps dir to GOPATH 30 | GOPATH=$pkgPath/Godeps/_workspace:$GOPATH 31 | fi 32 | -------------------------------------------------------------------------------- /golang-builder-1.7/build_environment.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | tagName=$1 4 | 5 | # Grab the source code 6 | go get -d github.com/$REPONAME/... 7 | 8 | # Grab Go package name 9 | pkgName=github.com/$REPONAME 10 | 11 | if [ -z "$pkgName" ]; 12 | then 13 | echo "Error: Must add package name as env var" 14 | exit 992 15 | fi 16 | 17 | # Grab just first path listed in GOPATH 18 | goPath="${GOPATH%%:*}" 19 | 20 | # Construct Go package path 21 | pkgPath="$goPath/src/$pkgName" 22 | 23 | if [ -e "$pkgPath/vendor" ]; 24 | then 25 | # Enable vendor experiment 26 | export GO15VENDOREXPERIMENT=1 27 | elif [ -e "$pkgPath/Godeps/_workspace" ]; 28 | then 29 | # Add local godeps dir to GOPATH 30 | GOPATH=$pkgPath/Godeps/_workspace:$GOPATH 31 | fi 32 | -------------------------------------------------------------------------------- /golang-builder-1.5/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | source /build_environment.sh 4 | 5 | # get the number of dependencies in the repo 6 | go list $pkgName/... > dep.log || true 7 | deps=`wc -l dep.log | cut -d' ' -f1`; 8 | 9 | # get number of lines of go code 10 | golines=`( find $pkgPath -name '*.go' -print0 | xargs -0 cat ) | wc -l` 11 | #golines=`find $pkgPath -name '*.go' | xargs wc -l` 12 | 13 | # time the compile of the statically linked version of package 14 | START_TIME=$(date +%s%N) 15 | go build $pkgName/... >>compile.log 2>&1 || true 16 | ELAPSED_TIME=$(($(date +%s%N) - $START_TIME)) 17 | 18 | lines=`wc -l compile.log | awk '{print $1}'`; 19 | if [ $lines -gt 0 ]; 20 | then 21 | echo $REPONAME, "error", $deps, $golines 22 | else 23 | echo $REPONAME, $ELAPSED_TIME, $deps, $golines 24 | fi 25 | 26 | rm compile.log 27 | -------------------------------------------------------------------------------- /golang-builder-1.7/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | source /build_environment.sh 4 | 5 | # get the number of dependencies in the repo 6 | go list $pkgName/... > dep.log || true 7 | deps=`wc -l dep.log | cut -d' ' -f1`; 8 | 9 | # get number of lines of go code 10 | golines=`( find $pkgPath -name '*.go' -print0 | xargs -0 cat ) | wc -l` 11 | #golines=`find $pkgPath -name '*.go' | xargs wc -l` 12 | 13 | # time the compile of the statically linked version of package 14 | START_TIME=$(date +%s%N) 15 | go build $pkgName/... >>compile.log 2>&1 || true 16 | ELAPSED_TIME=$(($(date +%s%N) - $START_TIME)) 17 | 18 | lines=`wc -l compile.log | awk '{print $1}'`; 19 | if [ $lines -gt 0 ]; 20 | then 21 | echo $REPONAME, "error", $deps, $golines 22 | else 23 | echo $REPONAME, $ELAPSED_TIME, $deps, $golines 24 | fi 25 | 26 | rm compile.log 27 | -------------------------------------------------------------------------------- /golang-builder-1.6/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | source /build_environment.sh 4 | 5 | # get the number of dependencies in the repo 6 | go list $pkgName/... > dep.log || true 7 | deps=`wc -l dep.log | cut -d' ' -f1`; 8 | 9 | # get number of lines of go code 10 | golines=`( find $pkgPath -name '*.go' -print0 | xargs -0 cat ) | wc -l` 11 | 12 | # time the compile of the statically linked version of package 13 | START_TIME=$(date +%s%N) 14 | go build $pkgName/... >>compile.log 2>&1 || true 15 | ELAPSED_TIME=$(($(date +%s%N) - $START_TIME)) 16 | 17 | lines=`wc -l compile.log | awk '{print $1}'`; 18 | if [ $lines -gt 0 ]; 19 | then 20 | echo $REPONAME, "error", $deps, $golines 21 | else 22 | echo $REPONAME, $ELAPSED_TIME, $deps, $golines 23 | fi 24 | 25 | # clean up 26 | rm compile.log 27 | rm dep.log 28 | rm -r $GOPATH/src/* 29 | rm -r $GOPATH/bin/* 30 | rm -r /src/* 31 | -------------------------------------------------------------------------------- /pipelines/pipeline.json: -------------------------------------------------------------------------------- 1 | { 2 | "pipeline": { 3 | "name": "filter" 4 | }, 5 | "transform": { 6 | "image": "dwhitena/repofilter", 7 | "cmd": [ "/repofilter" ], 8 | "stdin": [] 9 | }, 10 | "parallelism": "1", 11 | "inputs": [ 12 | { 13 | "repo": { 14 | "name": "repodata" 15 | }, 16 | "method": "map" 17 | } 18 | ] 19 | } 20 | { 21 | "pipeline": { 22 | "name": "compile16" 23 | }, 24 | "transform": { 25 | "image": "dwhitena/golangbuilder16", 26 | "cmd": [ "/bin/bash" ], 27 | "stdin": [ 28 | "for filename in /pfs/filter/*; do", 29 | "REPONAME=`cat $filename`", 30 | "new_name=`basename $filename`", 31 | "source /build.sh > /pfs/out/$new_name", 32 | "done" 33 | ] 34 | }, 35 | "parallelism": "1", 36 | "inputs": [ 37 | { 38 | "repo": { 39 | "name": "filter" 40 | }, 41 | "method": "map" 42 | } 43 | ] 44 | } 45 | -------------------------------------------------------------------------------- /getrepos.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "math" 7 | "time" 8 | 9 | "github.com/google/go-github/github" 10 | ) 11 | 12 | const ( 13 | REMAINING_THRESHOLD = 1 14 | ) 15 | 16 | func main() { 17 | 18 | client := github.NewClient(nil) 19 | 20 | // start time 21 | t1 := time.Date(2013, time.January, 1, 0, 0, 0, 0, time.UTC) 22 | 23 | for t1.Unix() < time.Now().Unix() { 24 | 25 | t2 := t1.Add(time.Hour * 24 * 2) 26 | tString := fmt.Sprintf("\"%d-%02d-%02d .. %d-%02d-%02d\"", 27 | t1.Year(), t1.Month(), t1.Day(), 28 | t2.Year(), t2.Month(), t2.Day()) 29 | 30 | query := fmt.Sprintf("language:Go created:" + tString) 31 | 32 | page := 1 33 | maxPage := math.MaxInt32 34 | 35 | opts := &github.SearchOptions{ 36 | Sort: "stars", 37 | Order: "desc", 38 | ListOptions: github.ListOptions{ 39 | PerPage: 100, 40 | }, 41 | } 42 | 43 | for page <= maxPage { 44 | opts.Page = page 45 | result, response, err := client.Search.Repositories(query, opts) 46 | Wait(response) 47 | 48 | if err != nil { 49 | log.Fatal("FindRepos:", err) 50 | } 51 | 52 | maxPage = response.LastPage 53 | 54 | msg := fmt.Sprintf("query: %s, page: %v/%v, size: %v, total: %v", 55 | tString, page, maxPage, len(result.Repositories), *result.Total) 56 | log.Println(msg) 57 | 58 | for _, repo := range result.Repositories { 59 | 60 | name := *repo.FullName 61 | updated_at := repo.UpdatedAt.String() 62 | created_at := repo.CreatedAt.String() 63 | forks := *repo.ForksCount 64 | issues := *repo.OpenIssuesCount 65 | stars := *repo.StargazersCount 66 | size := *repo.Size 67 | 68 | fmt.Printf("%s,%s,%s,%d,%d,%d,%d\n", 69 | name, updated_at, created_at, forks, issues, stars, size) 70 | 71 | } 72 | 73 | time.Sleep(time.Second * 10) 74 | page++ 75 | 76 | } 77 | 78 | t1 = t1.Add(time.Hour * 24 * 2) 79 | 80 | } 81 | 82 | } 83 | 84 | func Wait(response *github.Response) { 85 | if response != nil && response.Remaining <= REMAINING_THRESHOLD { 86 | gap := time.Duration(response.Reset.Local().Unix() - time.Now().Unix()) 87 | sleep := gap * time.Second 88 | if sleep < 0 { 89 | sleep = -sleep 90 | } 91 | 92 | time.Sleep(sleep) 93 | } 94 | } 95 | --------------------------------------------------------------------------------