├── .gitignore ├── .travis.yml ├── Makefile ├── verbose.go ├── cmd ├── progress.go ├── godl_test.go └── godl.go ├── LICENSE ├── README.md ├── downloader_test.go └── downloader.go /.gitignore: -------------------------------------------------------------------------------- 1 | godl 2 | *.part 3 | *.txt 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | install: 4 | - go get github.com/sethgrid/multibar 5 | - go get github.com/hydrogen18/stoppableListener 6 | - make test 7 | 8 | go: 9 | - tip 10 | 11 | notifications: 12 | email: false 13 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean 2 | 3 | all: 4 | go install 5 | go build cmd/godl.go cmd/progress.go 6 | 7 | test: all 8 | @set -e; \ 9 | STATUS=0; \ 10 | go test || STATUS=$$?; \ 11 | go test ./cmd || STATUS=$$?; \ 12 | exit $$STATUS; \ 13 | 14 | clean: 15 | rm godl 16 | -------------------------------------------------------------------------------- /verbose.go: -------------------------------------------------------------------------------- 1 | package multipartdownloader 2 | 3 | import ( 4 | "log" 5 | ) 6 | 7 | var verbose = false 8 | 9 | // Verbose logging utility 10 | func logVerbose(e ...interface{}) { 11 | if (verbose) { 12 | log.Print(e...) 13 | } 14 | } 15 | 16 | // Set verbosity for all log actions 17 | func SetVerbose(verb bool) { 18 | verbose = verb 19 | } 20 | 21 | -------------------------------------------------------------------------------- /cmd/progress.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | md "github.com/alvatar/multipart-downloader" 6 | "github.com/sethgrid/multibar" 7 | ) 8 | 9 | // Progress type 10 | type progress struct { 11 | progressBars *multibar.BarContainer 12 | } 13 | 14 | // Setup progress visualization 15 | func NewProgress(chunks []md.Chunk) (prog *progress) { 16 | numChunks := len(chunks) 17 | pBars, _ := multibar.New() 18 | 19 | prog = &progress{ 20 | progressBars: pBars, 21 | } 22 | 23 | for i := 0; i < numChunks; i++ { 24 | prog.progressBars.MakeBar(int(chunks[i].End - chunks[i].Begin), fmt.Sprintf("%2d:", i+1)) 25 | } 26 | 27 | go prog.progressBars.Listen() 28 | 29 | return 30 | } 31 | 32 | // Update values from connections progress 33 | func (prog *progress) Update(progressArray []md.ConnectionProgress) { 34 | for i := 0; i < len(progressArray); i++ { 35 | relativeProgress := int(progressArray[i].Current - progressArray[i].Begin) 36 | prog.progressBars.Bars[i].Update(relativeProgress) 37 | } 38 | } 39 | 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright © 2015 Alvaro Castro-Castilla 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /cmd/godl_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | "os/exec" 6 | "testing" 7 | ) 8 | 9 | func TestNoArgs (t *testing.T) { 10 | cmd := exec.Command("../godl") 11 | err := cmd.Run() 12 | if err == nil { // exit code 0 13 | t.Error("Running godl without arguments should exit with error") 14 | } 15 | } 16 | 17 | func TestNoUrls (t *testing.T) { 18 | cmd := exec.Command("../godl", "-n", "10") 19 | err := cmd.Run() 20 | if err == nil { // exit code 0 21 | t.Error("Running godl without an URL should exit with error") 22 | } 23 | } 24 | 25 | func TestWrongUrl (t *testing.T) { 26 | cmd := exec.Command("../godl", "http://example.com/nothing") 27 | err := cmd.Run() 28 | if err == nil { // exit code 0 29 | t.Error("Running godl with a wrong URL should exit with error") 30 | } 31 | } 32 | 33 | func TestUrl (t *testing.T) { 34 | cmd := exec.Command("../godl", "-o", "tmp_file", "https://raw.githubusercontent.com/alvatar/multipart-downloader/master/LICENSE") 35 | err := cmd.Run() 36 | if err != nil { 37 | t.Error("Running godl with -o output_file and an URL should be successful") 38 | return 39 | } 40 | if _, err := os.Stat("tmp_file"); os.IsNotExist(err) { 41 | t.Error("The file wasn't properly downloaded") 42 | return 43 | } 44 | os.Remove("tmp_file") 45 | } 46 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Multi-part file downloader 2 | 3 | Download a file with multiple connections and multiple sources simultaneously. 4 | 5 | [![Build Status](https://travis-ci.org/alvatar/multipart-downloader.svg?branch=master)](https://travis-ci.org/alvatar/multipart-downloader) [![Doc Status](https://godoc.org/github.com/alvatar/multipart-downloader?status.png)](https://godoc.org/github.com/alvatar/multipart-downloader) 6 | 7 | 8 | ## Installation 9 | 10 | # Build the executable 11 | make 12 | 13 | # Install as library 14 | go get github.com/alvatar/multipart-downloader 15 | 16 | ## Usage 17 | 18 | godl [flags ...] [urls ...] 19 | 20 | Flags: 21 | -n Number of concurrent connections 22 | -S A SHA-256 string to check the downloaded file 23 | -E Verify using Etag as MD5 24 | -t Timeout for all connections in milliseconds (default 5000) 25 | -o Output file 26 | -v Verbose output, show progress bars 27 | 28 | ## Usage as library 29 | 30 | ```go 31 | urls := []string{ 32 | "https://raw.githubusercontent.com/alvatar/multipart-downloader/master/test/quijote.txt", 33 | "https://raw.githubusercontent.com/alvatar/multipart-downloader/master/test/quijote2.txt",} 34 | nConns := 2 35 | timeout := time.Duration(5000) * time.Millisecond 36 | dldr := md.NewMultiDownloader(urls, nConns, timeout) 37 | 38 | // Gather info from all sources 39 | _, err := dldr.GatherInfo() 40 | 41 | // Prepare the file to write downloaded blocks on it 42 | _, err = dldr.SetupFile(*output) 43 | 44 | // Perform download 45 | err = dldr.Download(func(feedback []md.ConnectionProgress) { 46 | log.Println(feedback) 47 | }) 48 | 49 | err = dldr.CheckSHA256("1e9bb1b16f8810e44d6d5ede7005258518fa976719bc2ed254308e73c357cfcc") 50 | err = dldr.CheckMD5("45bb5fc96bb4c67778d288fba98eee48") 51 | ``` -------------------------------------------------------------------------------- /cmd/godl.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "log" 6 | "os" 7 | "os/signal" 8 | "syscall" 9 | "time" 10 | md "github.com/alvatar/multipart-downloader" 11 | ) 12 | 13 | var ( 14 | nConns = flag.Uint("n", 1, "Number of concurrent connections") 15 | sha256 = flag.String("S", "", "File containing SHA-256 hash, or a SHA-256 string") 16 | useEtag = flag.Bool("E", false, "Verify using ETag as MD5") 17 | timeout = flag.Uint("t", 5000, "Timeout for all connections in milliseconds") 18 | output = flag.String("o", "", "Output file") 19 | verbose = flag.Bool("v", false, "Verbose output") 20 | ) 21 | 22 | func exitOnError(err error) { 23 | if err != nil { 24 | log.Fatal(err) 25 | os.Exit(1) 26 | } 27 | } 28 | 29 | func main() { 30 | flag.Parse() 31 | log.SetPrefix("godl: ") 32 | if len(flag.Args()) == 0 { 33 | log.Fatal("No URLs provided") 34 | os.Exit(1) 35 | } 36 | 37 | // Register signals 38 | sigc := make(chan os.Signal, 1) 39 | signal.Notify(sigc, 40 | os.Interrupt, 41 | syscall.SIGHUP, 42 | syscall.SIGINT, 43 | syscall.SIGTERM, 44 | syscall.SIGQUIT) 45 | go func() { 46 | <-sigc 47 | log.Fatal("Exit with incomplete download") 48 | os.Exit(1) 49 | }() 50 | 51 | if *verbose { 52 | log.Println("Initializing download with", *nConns, "concurrent connections") 53 | } 54 | 55 | // Initialize download 56 | dldr := md.NewMultiDownloader(flag.Args(), int(*nConns), time.Duration(*timeout) * time.Millisecond) 57 | md.SetVerbose(*verbose) 58 | 59 | // Gather info from all sources 60 | chunks, err := dldr.GatherInfo() 61 | exitOnError(err) 62 | 63 | // Prepare the file to write individual blocks on 64 | _, err = dldr.SetupFile(*output) 65 | exitOnError(err) 66 | 67 | // Perform download 68 | if *verbose { 69 | // Setup bar visualization 70 | v := NewProgress(chunks) 71 | err = dldr.Download(func(feedback []md.ConnectionProgress) { 72 | v.Update(feedback) 73 | }) 74 | } else { 75 | err = dldr.Download(nil) 76 | } 77 | exitOnError(err) 78 | 79 | // Perform SHA256 check if requested 80 | if *sha256 != "" { 81 | err := dldr.CheckSHA256(*sha256) 82 | exitOnError(err) 83 | if err != nil { 84 | log.Fatal(err) 85 | os.Exit(1) 86 | } else if *verbose { 87 | log.Println("SHA-256 checked successfully") 88 | } 89 | } 90 | 91 | // Perform MD5SUM from ETag if requested 92 | if *useEtag { 93 | err := dldr.CheckMD5(dldr.ETag) 94 | exitOnError(err) 95 | if err != nil { 96 | log.Fatal(err) 97 | os.Exit(1) 98 | } else if *verbose { 99 | log.Println("MD5SUM checked successfully") 100 | } 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /downloader_test.go: -------------------------------------------------------------------------------- 1 | package multipartdownloader 2 | 3 | import ( 4 | "bytes" 5 | "net" 6 | "net/http" 7 | "io/ioutil" 8 | "log" 9 | "os" 10 | "reflect" 11 | "testing" 12 | "time" 13 | 14 | "github.com/hydrogen18/stoppableListener" 15 | ) 16 | 17 | func failOnError (t *testing.T, err error) { 18 | if err != nil { 19 | log.Fatal(err) 20 | } 21 | } 22 | 23 | // MultiDownloader.GatherInfo() test 24 | // NOTE: this test will fail if the file LICENSE diverges from the repository 25 | func TestGatherInfo (t *testing.T) { 26 | // Gather remote sources info 27 | urls := []string{"https://raw.githubusercontent.com/alvatar/multipart-downloader/master/LICENSE"} 28 | dldr := NewMultiDownloader(urls, 1, time.Duration(5000) * time.Millisecond) 29 | _, err := dldr.GatherInfo() 30 | failOnError(t, err) 31 | 32 | // Get the local file info and test if they match 33 | file, err := os.Open("LICENSE") // For read access. 34 | failOnError(t, err) 35 | stat, err := file.Stat() 36 | failOnError(t, err) 37 | if stat.Size() != dldr.fileLength { 38 | t.Error("Remote and reference local file sizes do not match") 39 | } 40 | } 41 | 42 | // MultiDownloader.SetupFile() test 43 | func TestSetupFile (t *testing.T) { 44 | // Gather remote sources info 45 | urls := []string{"https://raw.githubusercontent.com/alvatar/multipart-downloader/master/LICENSE"} 46 | dldr := NewMultiDownloader(urls, 1, time.Duration(5000) * time.Millisecond) 47 | _, err := dldr.GatherInfo() 48 | failOnError(t, err) 49 | 50 | // Create tmp file with custom name 51 | testFileName := "___testFile___" 52 | localFileInfo, err := dldr.SetupFile(testFileName) 53 | failOnError(t, err) 54 | // Remove the tmp file 55 | defer func() { 56 | err = os.Remove(dldr.partFilename) 57 | failOnError(t, err) 58 | }() 59 | if localFileInfo.Size() != dldr.fileLength { 60 | t.Error("Downloaded and created local file sizes do not match") 61 | } 62 | } 63 | 64 | func TestUrlToFilename (t *testing.T) { 65 | testTable := []struct { 66 | url string 67 | filename string 68 | } { 69 | {"https://raw.githubusercontent.com/alvatar/multipart-downloader/master/LICENSE", 70 | "LICENSE"}, 71 | {"https://kernel.org/pub/linux/kernel/v4.x/linux-4.0.tar.xz", 72 | "linux-4.0.tar.xz"}, 73 | {"https://kernel.org/pub/linux/kernel/v4.x/linux-4.0.tar.xz#frag-test", 74 | "linux-4.0.tar.xz"}, 75 | {"https://kernel.org/pub/linux/kernel/v4.x/linux-4.0.tar.xz?type=animal&name=narwhal#nose", 76 | "linux-4.0.tar.xz"}, 77 | } 78 | 79 | for _, test := range testTable { 80 | if urlToFilename(test.url) != test.filename { 81 | t.Fail() 82 | } 83 | } 84 | } 85 | 86 | func TestBuildChunks (t *testing.T) { 87 | testTable := []struct { 88 | fileLength int64 89 | nConns int 90 | chunks []Chunk 91 | } { 92 | {125, 1, []Chunk{{0, 125},}}, 93 | {125, 2, []Chunk{{0, 63}, {63, 125},}}, 94 | {125, 3, []Chunk{{0, 42}, {42, 84}, {84, 125},}}, 95 | {125, 4, []Chunk{{0, 32}, {32, 63}, {63, 94}, {94, 125},}}, 96 | } 97 | for _, test := range testTable { 98 | urls := []string{"https://raw.githubusercontent.com/alvatar/multipart-downloader/master/LICENSE"} 99 | dldr := NewMultiDownloader(urls, test.nConns, time.Duration(1)) 100 | dldr.fileLength = test.fileLength 101 | dldr.buildChunks() 102 | if !reflect.DeepEqual(dldr.chunks, test.chunks) { 103 | log.Println("Should be:", test.chunks) 104 | log.Println("Result is:", dldr.chunks) 105 | t.Fail() 106 | } 107 | } 108 | } 109 | 110 | func downloadElQuijote(t *testing.T, urls []string, n int, delete bool) *MultiDownloader { 111 | // Gather remote sources info 112 | dldr := NewMultiDownloader(urls, n, time.Duration(5000) * time.Millisecond) 113 | _, err := dldr.GatherInfo() 114 | failOnError(t, err) 115 | 116 | _, err = dldr.SetupFile("") 117 | failOnError(t, err) 118 | 119 | err = dldr.Download(nil) 120 | failOnError(t, err) 121 | if delete { 122 | defer func() { 123 | err = os.Remove(dldr.filename) 124 | failOnError(t, err) 125 | }() 126 | } 127 | 128 | // Load everything into memory and compare. Not efficient, but OK for testing 129 | f1, err := ioutil.ReadFile("test/quijote.txt") 130 | failOnError(t, err) 131 | f2, err := ioutil.ReadFile(dldr.filename) 132 | failOnError(t, err) 133 | 134 | if !bytes.Equal(f1, f2) { 135 | t.Fail() 136 | } 137 | 138 | return dldr 139 | } 140 | 141 | // Test SHA256 check 142 | func TestCheckSHA256File (t *testing.T) { 143 | dldr := downloadElQuijote(t, []string{"https://raw.githubusercontent.com/alvatar/multipart-downloader/master/test/quijote.txt"}, 1, false) 144 | defer func() { 145 | err := os.Remove(dldr.filename) 146 | failOnError(t, err) 147 | }() 148 | err := dldr.CheckSHA256("1e9bb1b16f8810e44d6d5ede7005258518fa976719bc2ed254308e73c357cfcc") 149 | if err != nil { 150 | t.Error(err) 151 | } 152 | err = dldr.CheckSHA256("wrong-hash") 153 | if err == nil { 154 | t.Error(err) 155 | } 156 | } 157 | 158 | // Test MD5SUM check 159 | func TestCheckMD5SUMFile (t *testing.T) { 160 | dldr := downloadElQuijote(t, []string{"https://raw.githubusercontent.com/alvatar/multipart-downloader/master/test/quijote.txt"}, 1, false) 161 | defer func() { 162 | err := os.Remove(dldr.filename) 163 | failOnError(t, err) 164 | }() 165 | // Compare manually with a MD5SUM generated with the command-line tool 166 | // Github's ETag doesn't reflect the MD5SUM 167 | err := dldr.CheckMD5("45bb5fc96bb4c67778d288fba98eee48") 168 | if err != nil { 169 | t.Error(err) 170 | } 171 | err = dldr.CheckMD5("wrong-hash") 172 | if err == nil { 173 | t.Error(err) 174 | } 175 | } 176 | 177 | // Test download with 1 remote source 178 | func Test1SourceRemote (t *testing.T) { 179 | nConns := []int{1, 2, 5, 10} 180 | for _, n := range nConns { 181 | downloadElQuijote(t, []string{"https://raw.githubusercontent.com/alvatar/multipart-downloader/master/test/quijote.txt"}, n, true) 182 | } 183 | } 184 | 185 | // Test download with 2 remote sources 186 | func Test2SourcesRemote (t *testing.T) { 187 | nConns := []int{1, 2, 7, 19} 188 | for _, n := range nConns { 189 | downloadElQuijote(t, 190 | []string{ 191 | "https://raw.githubusercontent.com/alvatar/multipart-downloader/master/test/quijote2.txt", 192 | "https://raw.githubusercontent.com/alvatar/multipart-downloader/master/test/quijote.txt", 193 | }, 194 | n, 195 | true) 196 | } 197 | } 198 | 199 | // Test download with a connection drop from one of the sources 200 | // This test also triggers the server connection limit case 201 | func TestConnectionDropLocal (t *testing.T) { 202 | shutdown := make(chan bool) 203 | 204 | go func() { 205 | originalListener, err := net.Listen("tcp", ":8081") 206 | if err != nil { 207 | panic(err) 208 | } 209 | 210 | sl, err := stoppableListener.New(originalListener) 211 | if err != nil { 212 | panic(err) 213 | } 214 | 215 | http.Handle("/", http.FileServer(http.Dir("./test"))) 216 | server := http.Server{} 217 | 218 | go server.Serve(sl) 219 | 220 | // Stop this listener when the signal is received 221 | <- shutdown 222 | sl.Stop() 223 | }() 224 | 225 | go func() { 226 | originalListener, err := net.Listen("tcp", ":8082") 227 | if err != nil { 228 | panic(err) 229 | } 230 | 231 | sl, err := stoppableListener.New(originalListener) 232 | if err != nil { 233 | panic(err) 234 | } 235 | 236 | http.Handle("/quijote2", http.FileServer(http.Dir("./test"))) 237 | server := http.Server{} 238 | 239 | go server.Serve(sl) 240 | 241 | // Stop this listener when the signal is received 242 | <- shutdown 243 | sl.Stop() 244 | }() 245 | 246 | // Wait for 50 milliseconds for listeners to be ready 247 | timer := time.NewTimer(time.Millisecond * 50) 248 | <- timer.C 249 | 250 | go downloadElQuijote(t, []string{ 251 | "http://localhost:8081/quijote.txt", 252 | "http://localhost:8082/quijote2.txt", 253 | }, 2, true) 254 | 255 | // Wait to shutdown the listeners, hopefully in the middle of the transfer 256 | // TODO: Are transfers shut down off non-gracefully (as we wish) 257 | timer = time.NewTimer(time.Millisecond * 50) 258 | <- timer.C 259 | shutdown <- true 260 | shutdown <- true 261 | } 262 | -------------------------------------------------------------------------------- /downloader.go: -------------------------------------------------------------------------------- 1 | package multipartdownloader 2 | 3 | import ( 4 | "crypto/md5" 5 | "crypto/sha256" 6 | "errors" 7 | "fmt" 8 | "io" 9 | "log" 10 | "net/http" 11 | "net/url" 12 | "os" 13 | "path" 14 | "strconv" 15 | "time" 16 | ) 17 | 18 | const ( 19 | tmpFileSuffix = ".part" 20 | fileWriteChunk = 1 << 12 21 | fileReadChunk = 1 << 12 22 | ) 23 | 24 | // Info gathered from different sources 25 | type urlInfo struct { 26 | url string 27 | fileLength int64 28 | etag string 29 | connSuccess bool 30 | statusCode int 31 | } 32 | 33 | // Chunk boundaries 34 | type Chunk struct { 35 | Begin int64 36 | End int64 37 | } 38 | 39 | // Progress feedback type 40 | type ConnectionProgress struct { 41 | Id int 42 | Begin int64 43 | End int64 44 | Current int64 45 | } 46 | 47 | // The file downloader 48 | type MultiDownloader struct { 49 | urls []string // List of all sources for the file 50 | nConns int // Number of max concurrent connections to use 51 | timeout time.Duration // Timeout for all connections 52 | fileLength int64 // Size of the file. It could be larger than 4GB. 53 | filename string // Output filename 54 | partFilename string // Incomplete output filename 55 | ETag string // ETag (if available) of the file 56 | chunks []Chunk // A table of the chunks the file is divided into 57 | } 58 | 59 | func NewMultiDownloader(urls []string, nConns int, timeout time.Duration) *MultiDownloader { 60 | return &MultiDownloader{urls: urls, nConns: nConns, timeout: timeout} 61 | } 62 | 63 | // Get the info of the file, using the HTTP HEAD request 64 | func (dldr *MultiDownloader) GatherInfo() (chunks []Chunk, err error) { 65 | if len(dldr.urls) == 0 { 66 | return nil, errors.New("No URLs provided") 67 | } 68 | 69 | results := make(chan urlInfo) 70 | defer close(results) 71 | 72 | // Connect to all sources concurrently 73 | getHead := func (url string) { 74 | client := http.Client{ 75 | Timeout: time.Duration(dldr.timeout), 76 | } 77 | resp, err := client.Head(url) 78 | if err != nil { 79 | results <- urlInfo{url: url, connSuccess: false, statusCode: 0} 80 | return 81 | } 82 | defer resp.Body.Close() 83 | flen, err := strconv.ParseInt(resp.Header.Get("Content-Length"), 0, 64) 84 | etag := resp.Header.Get("Etag") 85 | if err != nil { 86 | log.Println("Error reading Content-Length from HTTP header") 87 | flen = 0 88 | } 89 | results <- urlInfo{ 90 | url: url, 91 | fileLength: flen, 92 | etag: etag, 93 | connSuccess: true, 94 | statusCode: resp.StatusCode, 95 | } 96 | } 97 | for _, url := range dldr.urls { 98 | go getHead(url) 99 | } 100 | 101 | // Gather the results and return if something is wrong 102 | resArray := make([]urlInfo, len(dldr.urls)) 103 | for i := 0; i < len(dldr.urls); i++ { 104 | r := <-results 105 | resArray[i] = r 106 | if !r.connSuccess || r.statusCode != 200 { 107 | return nil, errors.New(fmt.Sprintf("Failed connection to URL %s", resArray[i].url)) 108 | } 109 | } 110 | 111 | // Check that all sources agree on file length and Etag 112 | // Empty Etags are also accepted 113 | commonFileLength := resArray[0].fileLength 114 | commonEtag := resArray[0].etag 115 | for _, r := range resArray[1:] { 116 | if r.fileLength != commonFileLength || (len(r.etag) != 0 && r.etag != commonEtag) { 117 | return nil, errors.New("URLs must point to the same file") 118 | } 119 | } 120 | dldr.fileLength = commonFileLength 121 | if commonEtag != "" { 122 | dldr.ETag = commonEtag[1:len(commonEtag)-1] // Remove the surrounding "" 123 | } 124 | dldr.filename = urlToFilename(resArray[0].url) 125 | dldr.partFilename = dldr.filename + tmpFileSuffix 126 | 127 | logVerbose("File length: ", dldr.fileLength, " bytes") 128 | logVerbose("File name: ", dldr.filename) 129 | logVerbose("Parts file name: ", dldr.partFilename) 130 | logVerbose("Etag: ", dldr.ETag) 131 | 132 | // Build the chunks table, necessary for constructing requests 133 | dldr.buildChunks() 134 | 135 | return dldr.chunks, nil 136 | } 137 | 138 | // Prepare the file used for writing the blocks of data 139 | func (dldr *MultiDownloader) SetupFile(filename string) (os.FileInfo, error) { 140 | if filename != "" { 141 | dldr.filename = filename 142 | dldr.partFilename = filename + tmpFileSuffix 143 | } 144 | 145 | file, err := os.Create(dldr.partFilename) 146 | if err != nil { 147 | return nil, err 148 | } 149 | 150 | // Force file size in order to write arbitrary chunks 151 | err = file.Truncate(dldr.fileLength) 152 | fileInfo, err := file.Stat() 153 | return fileInfo, err 154 | } 155 | 156 | // Internal: build the chunks table, deciding boundaries 157 | func (dldr *MultiDownloader) buildChunks() { 158 | // The algorithm takes care of possible rounding errors splitting into chunks 159 | // by taking out the remainder and distributing it among the first chunks 160 | n := int64(dldr.nConns) 161 | remainder := dldr.fileLength % n 162 | exactNumerator := dldr.fileLength - remainder 163 | chunkSize := exactNumerator / n 164 | dldr.chunks = make([]Chunk, n) 165 | boundary := int64(0) 166 | nextBoundary := chunkSize 167 | for i := int64(0); i < n; i++ { 168 | if remainder > 0 { 169 | remainder-- 170 | nextBoundary++ 171 | } 172 | dldr.chunks[i] = Chunk{boundary, nextBoundary} 173 | boundary = nextBoundary 174 | nextBoundary = nextBoundary + chunkSize 175 | } 176 | } 177 | 178 | // Perform the multipart download 179 | // 180 | // This algorithm handles download splitting the file into n blocks. If a connection fails, it 181 | // will try with other sources (as different sources may have different connection limits) then, 182 | // if it still fails, it will wait until other process is done. Thus, nConns really means the 183 | // MAXIMUM allowed connections, which will be tried at first and then adjusted. 184 | // The alternative approach of dividing into nSize blocks and spawn threads requests from a pool 185 | // of tasks has been discarded to avoid the overhead of performing potentially too many HTTP 186 | // requests, as a result of each thread performing many requests instead of the minimum necessary. 187 | // 188 | // The designed algorithm tries to minimize the amount of successful HTTP requests. 189 | // 190 | // As a result of the approach taken, the number of concurrent connections can drop if no source 191 | // is available to accomodate the request. In any case, setting a reasonable limit is left to the 192 | // Take into consideration that some servers may ban your IP for some amount of time if you flood 193 | // them with too many requests. 194 | func (dldr *MultiDownloader) Download( feedbackFunc func ([]ConnectionProgress) ) (err error) { 195 | done := make(chan bool) 196 | failed := make(chan bool) 197 | available := make(chan bool, dldr.nConns) 198 | 199 | progress := make(chan ConnectionProgress) 200 | 201 | // Parallel download, wait for all to return 202 | downloadChunk := func(f *os.File, i int) { 203 | numUrls := len(dldr.urls) 204 | for { 205 | // Block until there are connections available (all goroutines at first) 206 | <- available 207 | 208 | for try := 0; try < numUrls; try++ { // Try each URL before signaling failure 209 | client := &http.Client{} 210 | // Select URL in a Round-Robin fashion, each try is done with the next i 211 | selectedUrl := dldr.urls[(i+try) % numUrls] 212 | 213 | // Send per-range requests 214 | req, err := http.NewRequest("GET", selectedUrl, nil) 215 | if err != nil { 216 | continue; 217 | } 218 | req.Header.Add("Range", fmt.Sprintf("bytes=%d-%d", dldr.chunks[i].Begin, dldr.chunks[i].End)) 219 | resp, err := client.Do(req) 220 | if err != nil { 221 | continue; 222 | } 223 | defer resp.Body.Close() 224 | 225 | // Read response and process it in chunks 226 | buf := make([]byte, fileWriteChunk) 227 | cursor := dldr.chunks[i].Begin 228 | for { 229 | n, err := io.ReadFull(resp.Body, buf) 230 | if err == io.EOF { 231 | done <- true // Signal success 232 | return 233 | } 234 | // According to doc: "Clients of WriteAt can execute parallel WriteAt calls on the 235 | // same destination if the ranges do not overlap." 236 | _, errWr := f.WriteAt(buf[:n], cursor) 237 | if errWr != nil { 238 | log.Fatal(errWr) 239 | break 240 | } 241 | cursor += int64(n) 242 | 243 | // Send progress if feedback function is provided 244 | if feedbackFunc != nil { 245 | progress <- ConnectionProgress{ 246 | Id: i, 247 | Begin: dldr.chunks[i].Begin, 248 | End: dldr.chunks[i].End, 249 | Current: cursor, 250 | } 251 | } 252 | } 253 | } 254 | 255 | failed <- true // Signal failure 256 | } 257 | } 258 | 259 | file, err := os.OpenFile(dldr.partFilename, os.O_WRONLY, 0666) 260 | if err != nil { 261 | return 262 | } 263 | 264 | for i := 0; i < dldr.nConns; i++ { 265 | go downloadChunk(file, i) 266 | 267 | // We start making all requested connections available 268 | available <- true 269 | } 270 | 271 | // Handle progress feedback 272 | if feedbackFunc != nil { 273 | progressArray := make([]ConnectionProgress, dldr.nConns) 274 | for i := 0; i < dldr.nConns; i++ { 275 | progressArray[i] = ConnectionProgress{ 276 | Id: i, 277 | Begin: dldr.chunks[i].Begin, 278 | End: dldr.chunks[i].End, 279 | Current: dldr.chunks[i].Begin, 280 | } 281 | } 282 | go func() { 283 | complete := 0 284 | for complete < dldr.nConns { 285 | p := <-progress 286 | progressArray[p.Id] = p 287 | feedbackFunc(progressArray) 288 | if p.Current >= p.End { 289 | complete++ 290 | } 291 | } 292 | }() 293 | } 294 | 295 | remainingChunks := dldr.nConns 296 | failedCount := 0 297 | for remainingChunks > 0 { 298 | // Block until a goroutine either succeeded or failed 299 | select { 300 | case <- done: 301 | remainingChunks-- 302 | available <- true // Does not block up to nConns items 303 | case <- failed: 304 | failedCount++ 305 | if failedCount >= dldr.nConns { 306 | return errors.New("The file couldn't be downloaded from any source. Aborting.") 307 | } 308 | } 309 | } 310 | 311 | err = os.Rename(dldr.partFilename, dldr.filename) 312 | return 313 | } 314 | 315 | // Check SHA-256 of downloaded file 316 | func (dldr *MultiDownloader) CheckSHA256(sha256hash string) (err error) { 317 | // Open the file and get the size 318 | file, err := os.Open(dldr.filename) 319 | if err != nil { 320 | return err 321 | } 322 | defer func() { 323 | if err := file.Close(); err != nil { 324 | panic(err) 325 | } 326 | }() 327 | 328 | // Compute the SHA256 329 | buf := make([] byte, fileReadChunk) 330 | hash := sha256.New() 331 | for { 332 | n, err := file.Read(buf) 333 | if err != nil && err != io.EOF { 334 | panic(err) 335 | } 336 | if n == 0 { 337 | break 338 | } 339 | 340 | if _, err := hash.Write(buf[:n]); err != nil { 341 | panic(err) 342 | } 343 | } 344 | computedSHA256bytes := hash.Sum(nil) 345 | 346 | // Compare the SHA256 347 | computedSHA256 := fmt.Sprintf("%x", computedSHA256bytes) 348 | 349 | if computedSHA256 != sha256hash { 350 | return errors.New(fmt.Sprintf("Computed SHA256 does not match: provided=%s computed=%s", sha256hash, computedSHA256)) 351 | } 352 | return nil 353 | } 354 | 355 | // Check MD5SUM of downloaded file 356 | func (dldr *MultiDownloader) CheckMD5(md5sum string) (err error) { 357 | // Open the file and get the size 358 | file, err := os.Open(dldr.filename) 359 | if err != nil { 360 | return err 361 | } 362 | defer func() { 363 | if err := file.Close(); err != nil { 364 | panic(err) 365 | } 366 | }() 367 | 368 | // Compute the MD5SUM 369 | buf := make([] byte, fileReadChunk) 370 | hash := md5.New() 371 | for { 372 | n, err := file.Read(buf) 373 | if err != nil && err != io.EOF { 374 | panic(err) 375 | } 376 | if n == 0 { 377 | break 378 | } 379 | 380 | if _, err := hash.Write(buf[:n]); err != nil { 381 | panic(err) 382 | } 383 | } 384 | computedMD5SUMbytes := hash.Sum(nil) 385 | 386 | // Compare the MD5SUM 387 | computedMD5SUM := fmt.Sprintf("%x", computedMD5SUMbytes) 388 | 389 | if computedMD5SUM != md5sum { 390 | return errors.New(fmt.Sprintf("Computed MD5SUM does not match: provided=%s computed=%s", md5sum, computedMD5SUM)) 391 | } 392 | return nil 393 | } 394 | 395 | 396 | //////////////////////////////////////////////////////////////////////////////// 397 | // Auxiliary functions 398 | 399 | // Get the name of the file from the URL 400 | func urlToFilename(urlStr string) string { 401 | url, err := url.Parse(urlStr) 402 | if err != nil { 403 | return "downloaded-file" 404 | } 405 | _, f := path.Split(url.Path) 406 | return f 407 | } 408 | --------------------------------------------------------------------------------