├── README.md ├── misc ├── bar1.gif ├── bar2.gif ├── img1.png ├── img2.png ├── img3.png ├── img4.png └── img5.png ├── pcurl.go └── pcurl_test.go /README.md: -------------------------------------------------------------------------------- 1 | # go-pcurl 2 | cURL in parallel way, Written in golang 3 | 4 | ## Progressing bar 5 | 6 | *Downloading resource from nginx runs locally* 7 | 8 | ![Progress](https://github.com/thimoonxy/go-pcurl/blob/master/misc/bar1.gif) 9 | 10 | *Downloading resource from website* 11 | 12 | ![Progress](https://github.com/thimoonxy/go-pcurl/blob/master/misc/bar2.gif) 13 | 14 | ## Keep-Alive 15 | 16 | ![Progress](https://github.com/thimoonxy/go-pcurl/blob/master/misc/img5.png) 17 | 18 | ## Testing Sample, compared w/ traditional wget way 19 | 20 | ### Common wget way took 6m21s+, used only 1 connection 21 | 22 | ``` 23 | 24 | (Laptop)simon@Simon-MBp:~/src$time wget https://download.docker.com/linux/centos/7/x86_64/stable/Packages/docker-ce-17.09.0.ce-1.el7.centos.x86_64.rpm -SO /tmp/docker.rpm 25 | --2017-09-30 15:21:03-- https://download.docker.com/linux/centos/7/x86_64/stable/Packages/docker-ce-17.09.0.ce-1.el7.centos.x86_64.rpm 26 | Resolving download.docker.com... 54.239.132.250, 54.239.132.174, 54.239.132.84, ... 27 | Connecting to download.docker.com|54.239.132.250|:443... connected. 28 | HTTP request sent, awaiting response... 29 | HTTP/1.1 200 OK 30 | Content-Type: application/x-redhat-package-manager 31 | Content-Length: 22157896 32 | Connection: keep-alive 33 | Date: Wed, 27 Sep 2017 01:55:11 GMT 34 | Last-Modified: Wed, 27 Sep 2017 01:47:41 GMT 35 | x-amz-version-id: Y76xcrpq2VKOnT7JLWgG5L65_DXLCww4 36 | ETag: "5e7d5e5afcc6cda75771533dc58b2749-3" 37 | Server: AmazonS3 38 | X-Cache: RefreshHit from cloudfront 39 | Via: 1.1 ae162f6796e551002447afd7c07ec67a.cloudfront.net (CloudFront) 40 | X-Amz-Cf-Id: kQiBxdtLGBmSlogdglwGJHFYT_G9IjHjR_SMZQq64isGqGp4A3MhWg== 41 | Length: 22157896 (21M) [application/x-redhat-package-manager] 42 | Saving to: ‘/tmp/docker.rpm’ 43 | 44 | /tmp/docker.rpm 100%[=============================================================================================>] 21.13M 100KB/s in 6m 20s 45 | 46 | 2017-09-30 15:27:25 (56.9 KB/s) - ‘/tmp/docker.rpm’ saved [22157896/22157896] 47 | 48 | 49 | real 6m21.471s 50 | user 0m0.168s 51 | sys 0m0.560s 52 | (Laptop)simon@Simon-MBp:~/src$md5 /tmp/docker.rpm 53 | MD5 (/tmp/docker.rpm) = 647b4bb14e61bec73ddd137f6a40edac 54 | 55 | ``` 56 | 57 | ### pcurl took only 47s, used 20 connections 58 | 59 | ``` 60 | (Laptop)simon@Simon-MBp:~/src$time ./pcurl https://download.docker.com/linux/centos/7/x86_64/stable/Packages/docker-ce-17.09.0.ce-1.el7.centos.x86_64.rpm /tmp/docker.rpm 61 | 2017/09/30 15:34:02 Created tmpdir: /tmp/gotemp430902692 62 | 2017/09/30 15:34:21 Created tmpfile: /tmp/gotemp430902692/13.223914333 63 | 2017/09/30 15:34:21 Created tmpfile: /tmp/gotemp430902692/15.058328607 64 | 2017/09/30 15:34:23 Created tmpfile: /tmp/gotemp430902692/0.228534679 65 | 2017/09/30 15:34:23 Created tmpfile: /tmp/gotemp430902692/12.122017740 66 | 2017/09/30 15:34:23 Created tmpfile: /tmp/gotemp430902692/7.808832630 67 | 2017/09/30 15:34:24 Created tmpfile: /tmp/gotemp430902692/6.203386216 68 | 2017/09/30 15:34:26 Created tmpfile: /tmp/gotemp430902692/18.738236867 69 | 2017/09/30 15:34:28 Created tmpfile: /tmp/gotemp430902692/1.387619003 70 | 2017/09/30 15:34:28 Created tmpfile: /tmp/gotemp430902692/5.490759238 71 | 2017/09/30 15:34:29 Created tmpfile: /tmp/gotemp430902692/8.978373601 72 | 2017/09/30 15:34:29 Created tmpfile: /tmp/gotemp430902692/17.806482164 73 | 2017/09/30 15:34:30 Created tmpfile: /tmp/gotemp430902692/2.045384128 74 | 2017/09/30 15:34:30 Created tmpfile: /tmp/gotemp430902692/16.242278130 75 | 2017/09/30 15:34:34 Created tmpfile: /tmp/gotemp430902692/4.063984553 76 | 2017/09/30 15:34:37 Created tmpfile: /tmp/gotemp430902692/9.871300318 77 | 2017/09/30 15:34:37 Created tmpfile: /tmp/gotemp430902692/11.116061194 78 | 2017/09/30 15:34:43 Created tmpfile: /tmp/gotemp430902692/3.103707373 79 | 2017/09/30 15:34:43 Created tmpfile: /tmp/gotemp430902692/19.037809331 80 | 2017/09/30 15:34:45 Created tmpfile: /tmp/gotemp430902692/10.886562072 81 | 2017/09/30 15:34:46 Created tmpfile: /tmp/gotemp430902692/14.389013669 82 | 2017/09/30 15:34:46 Cleaned tmpfile: /tmp/gotemp430902692/0.228534679 83 | 2017/09/30 15:34:46 Cleaned tmpfile: /tmp/gotemp430902692/1.387619003 84 | 2017/09/30 15:34:46 Cleaned tmpfile: /tmp/gotemp430902692/2.045384128 85 | 2017/09/30 15:34:46 Cleaned tmpfile: /tmp/gotemp430902692/3.103707373 86 | 2017/09/30 15:34:46 Cleaned tmpfile: /tmp/gotemp430902692/4.063984553 87 | 2017/09/30 15:34:46 Cleaned tmpfile: /tmp/gotemp430902692/5.490759238 88 | 2017/09/30 15:34:46 Cleaned tmpfile: /tmp/gotemp430902692/6.203386216 89 | 2017/09/30 15:34:46 Cleaned tmpfile: /tmp/gotemp430902692/7.808832630 90 | 2017/09/30 15:34:46 Cleaned tmpfile: /tmp/gotemp430902692/8.978373601 91 | 2017/09/30 15:34:46 Cleaned tmpfile: /tmp/gotemp430902692/9.871300318 92 | 2017/09/30 15:34:46 Cleaned tmpfile: /tmp/gotemp430902692/10.886562072 93 | 2017/09/30 15:34:46 Cleaned tmpfile: /tmp/gotemp430902692/11.116061194 94 | 2017/09/30 15:34:46 Cleaned tmpfile: /tmp/gotemp430902692/12.122017740 95 | 2017/09/30 15:34:46 Cleaned tmpfile: /tmp/gotemp430902692/13.223914333 96 | 2017/09/30 15:34:46 Cleaned tmpfile: /tmp/gotemp430902692/14.389013669 97 | 2017/09/30 15:34:46 Cleaned tmpfile: /tmp/gotemp430902692/15.058328607 98 | 2017/09/30 15:34:46 Cleaned tmpfile: /tmp/gotemp430902692/16.242278130 99 | 2017/09/30 15:34:46 Cleaned tmpfile: /tmp/gotemp430902692/17.806482164 100 | 2017/09/30 15:34:46 Cleaned tmpfile: /tmp/gotemp430902692/18.738236867 101 | 2017/09/30 15:34:46 Cleaned tmpfile: /tmp/gotemp430902692/19.037809331 102 | 2017/09/30 15:34:46 Downloaded: from https://download.docker.com/linux/centos/7/x86_64/stable/Packages/docker-ce-17.09.0.ce-1.el7.centos.x86_64.rpm to /tmp/docker.rpm 103 | 2017/09/30 15:34:46 Removed tmpdir: /tmp/gotemp430902692 104 | 105 | real 0m47.058s 106 | user 0m0.497s 107 | sys 0m0.902s 108 | 109 | (Laptop)simon@Simon-MBp:~/src$md5 /tmp/docker.rpm 110 | MD5 (/tmp/docker.rpm) = 647b4bb14e61bec73ddd137f6a40edac 111 | ``` 112 | 113 | ### iftop outputs when running wget (in 1 connection) 114 | 115 | ![Progress](https://github.com/thimoonxy/go-pcurl/blob/master/misc/img2.png) 116 | 117 | ### iftop outputs when running pcurl (in 4 connections) 118 | 119 | ![Progress](https://github.com/thimoonxy/go-pcurl/blob/master/misc/img1.png) 120 | 121 | ### iftop outputs when running pcurl (in more connections) 122 | 123 | ![Progress](https://github.com/thimoonxy/go-pcurl/blob/master/misc/img3.png) 124 | 125 | ![Progress](https://github.com/thimoonxy/go-pcurl/blob/master/misc/img4.png) 126 | 127 | 128 | ### TODO 129 | 130 | - [ ] Need more Parameter flags instead of $@ ; 131 | - [ ] BW controling in each connection; 132 | - [x] More human readable outputs instead of ugly log.print things; 133 | - [x] Processing bar; 134 | - [ ] Detect and select faster ip to use, if resource can be resolved to several IPs records; 135 | - [x] Local optimization, decreasing the cost of native Mem/disk IO and so forth; 136 | - [x] Keep-Alive; -------------------------------------------------------------------------------- /misc/bar1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thimoonxy/go-pcurl/88410faaf1234eda6ea3fd6636fc24de531dd776/misc/bar1.gif -------------------------------------------------------------------------------- /misc/bar2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thimoonxy/go-pcurl/88410faaf1234eda6ea3fd6636fc24de531dd776/misc/bar2.gif -------------------------------------------------------------------------------- /misc/img1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thimoonxy/go-pcurl/88410faaf1234eda6ea3fd6636fc24de531dd776/misc/img1.png -------------------------------------------------------------------------------- /misc/img2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thimoonxy/go-pcurl/88410faaf1234eda6ea3fd6636fc24de531dd776/misc/img2.png -------------------------------------------------------------------------------- /misc/img3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thimoonxy/go-pcurl/88410faaf1234eda6ea3fd6636fc24de531dd776/misc/img3.png -------------------------------------------------------------------------------- /misc/img4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thimoonxy/go-pcurl/88410faaf1234eda6ea3fd6636fc24de531dd776/misc/img4.png -------------------------------------------------------------------------------- /misc/img5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thimoonxy/go-pcurl/88410faaf1234eda6ea3fd6636fc24de531dd776/misc/img5.png -------------------------------------------------------------------------------- /pcurl.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "io/ioutil" 7 | "log" 8 | "net/http" 9 | "os" 10 | "runtime" 11 | "strconv" 12 | "strings" 13 | "time" 14 | ) 15 | 16 | var Clientvar *http.Client 17 | 18 | func ckerr(err error) { 19 | if err != nil { 20 | log.Fatal(err.Error()) 21 | } 22 | } 23 | 24 | func getres(client *http.Client, url string, rgstart, rgend int64) (res *http.Response) { 25 | 26 | // Init Request 27 | req, err := http.NewRequest("GET", url, nil) 28 | ckerr(err) 29 | 30 | // Headers 31 | req.Proto = "HTTP/1.1" 32 | req.Header.Add("Accept", 33 | "*/*") 34 | req.Header.Add("User-Agent", 35 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36") 36 | req.Header.Del("Accept-Encoding") 37 | 38 | // Range 39 | if rgstart >= 0 && rgend > 0 { 40 | req.Header.Add("Range", 41 | fmt.Sprintf("bytes=%d-%d", rgstart, rgend)) 42 | } 43 | 44 | // Response 45 | res, err = client.Do(req) 46 | // dump, _ := httputil.DumpResponse(res, false) 47 | // fmt.Printf("Response:\n%s\n", string(dump)) 48 | ckerr(err) 49 | return res 50 | } 51 | 52 | func createTMPdir(base, prefix string) string { 53 | name, err := ioutil.TempDir(base, prefix) 54 | ckerr(err) 55 | return name 56 | } 57 | 58 | func destroyTMPdir(dir, prefix string) { 59 | if strings.Contains(dir, prefix) != true { 60 | log.Fatal(dir + " Dir is not a temp dir.") 61 | } 62 | stat, err := os.Stat(dir) 63 | ckerr(err) 64 | if stat.IsDir() { 65 | err = os.RemoveAll(dir) 66 | ckerr(err) 67 | log.Printf("Removed tmpdir: %s", dir) 68 | } 69 | } 70 | 71 | type subtask struct { 72 | seq int64 73 | rgstart int64 74 | rgend int64 75 | length int64 76 | islast bool 77 | url string 78 | tmpfname string 79 | tmpcreated bool 80 | } 81 | 82 | /* Byte Ranges 83 | Reference to https://tools.ietf.org/html/rfc7233#section-2.1 84 | The first-byte-pos value in a byte-range-spec gives the byte-offset 85 | of the first byte in a range. The last-byte-pos value gives the 86 | byte-offset of the last byte in the range; that is, the byte 87 | positions specified are inclusive. Byte offsets start at zero. 88 | 89 | Examples of byte-ranges-specifier values: 90 | 91 | o The first 500 bytes (byte offsets 0-499, inclusive): 92 | bytes=0-499 93 | o The second 500 bytes (byte offsets 500-999, inclusive): 94 | bytes=500-999 */ 95 | func (task *subtask) init(originlength, count int64, url, tmpdirname string) { 96 | task.url = url 97 | task.tmpcreated = false 98 | task.rgstart = task.seq*(originlength/count) + task.seq&task.seq 99 | if task.seq == count-1 { 100 | task.islast = true 101 | // task.rgstart = task.seq*(originlength/count) + originlength%count + task.seq&task.seq 102 | // task.length = originlength - originlength/count*count + originlength/count 103 | task.rgend = originlength 104 | task.length = task.rgend - task.rgstart 105 | } else { 106 | task.islast = false 107 | task.length = originlength/count + 1 108 | task.rgend = task.rgstart + task.length - 1 109 | } 110 | 111 | tmp, err := ioutil.TempFile(tmpdirname, strconv.FormatInt(task.seq, 10)+".") 112 | ckerr(err) 113 | task.tmpfname = tmp.Name() 114 | } 115 | 116 | func (task *subtask) get(client *http.Client) { 117 | // create bar with appropriate decorators 118 | 119 | res := getres(client, task.url, task.rgstart, task.rgend) 120 | 121 | // read from proxy reader 122 | 123 | outputs, err := ioutil.ReadAll(res.Body) 124 | // ckerr(err) 125 | 126 | // write to tmp file 127 | err = ioutil.WriteFile(task.tmpfname, outputs, 0600) 128 | ckerr(err) 129 | task.tmpcreated = true 130 | 131 | // io.Copy(ioutil.Discard, res.Body) 132 | res.Body.Close() 133 | } 134 | 135 | func reassemble(tasks []subtask, dst string) (done bool) { 136 | f, err := os.OpenFile(dst, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) 137 | defer f.Close() 138 | ckerr(err) 139 | done = false 140 | for i := 0; i < len(tasks); i++ { 141 | if tasks[i].tmpcreated { 142 | // appending to destination fpath 143 | tmp, err := os.Open(tasks[i].tmpfname) 144 | r := bufio.NewReader(tmp) 145 | n, err := r.WriteTo(f) 146 | ckerr(err) 147 | if n == 0 { 148 | return 149 | } 150 | 151 | // verifying tmpfile length vs range len in this task 152 | s, err := tmp.Stat() 153 | ckerr(err) 154 | if s.Size() != tasks[i].length { 155 | tmp.Close() 156 | log.Fatal(fmt.Sprintf("TmpFile %s size %d != range length %d", tasks[i].tmpfname, s.Size(), tasks[i].length)) 157 | } 158 | 159 | // cleanning up 160 | tmp.Close() 161 | err = os.Remove(tasks[i].tmpfname) 162 | ckerr(err) 163 | log.Printf("Cleaned tmpfile: %s", tasks[i].tmpfname) 164 | } 165 | } 166 | done = true 167 | return 168 | } 169 | 170 | func b2s(data int64, n int) string { 171 | var byteUnits = []string{"B", "KB", "MB", "GB", "TB", "PB"} 172 | if data < 1<<10 { 173 | return fmt.Sprintf("%d %s", data, byteUnits[n]) 174 | } 175 | return b2s(data/1024, n+1) 176 | } 177 | 178 | func precount(originlength, bufsize int64) (onetime, tmpfcount int) { 179 | if originlength >= bufsize { 180 | tmpfcount = int(originlength / bufsize) 181 | if tmpfcount >= 400 { // in case too many openfiles 182 | tmpfcount = 200 183 | } else if tmpfcount <= 4 { 184 | tmpfcount = 4 185 | } 186 | log.Printf("TmpFile amount: %d", tmpfcount) 187 | onetime = 4 188 | } else { 189 | tmpfcount = 4 190 | onetime = tmpfcount 191 | } 192 | return 193 | } 194 | 195 | func acceptRange(res *http.Response, dst string) { 196 | accept := res.Header.Get("Accept-Ranges") 197 | if strings.TrimSpace(accept) == "" { 198 | log.Println("Range not accepted, downloading directly ...") 199 | f, err := os.OpenFile(dst, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) 200 | defer f.Close() 201 | ckerr(err) 202 | r := bufio.NewReader(res.Body) 203 | n, err := r.WriteTo(f) 204 | ckerr(err) 205 | if res.ContentLength != n { 206 | log.Fatal(fmt.Sprintf("ContentLength %d != file size %d", res.ContentLength, n)) 207 | } 208 | log.Println("Finshied:", dst) 209 | os.Exit(0) 210 | } 211 | } 212 | 213 | func main() { 214 | // Constants 215 | tmpbase := "/tmp" 216 | f, _ := os.Stat(tmpbase) 217 | if !f.IsDir() { 218 | tmpbase = os.TempDir() 219 | } 220 | tmpprefix := "gotemp" 221 | bufsize := int64(10 << 20) // 10MB 222 | MaxIdleConnections := 20 223 | RequestTimeout := 0 // no timeout 224 | 225 | // Preparing args 226 | if len(os.Args) != 3 { 227 | fmt.Println("Usage: $0 $src $dst") 228 | os.Exit(1) 229 | } 230 | url := os.Args[1] // url = "http://127.0.0.1:8080/centos.iso" // tmp testing 231 | dst := os.Args[2] // destination fpath, where file stores 232 | _, err := os.Stat(dst) 233 | if err == nil { 234 | fmt.Printf("%s found, Override?: [y/n]", dst) 235 | var decide string 236 | fmt.Scan(&decide) 237 | decide = strings.ToLower(decide) 238 | if strings.TrimSpace(decide) == "y" { 239 | os.Remove(dst) 240 | } else { 241 | log.Fatalf("Expected [y/n], but got %v", decide) 242 | } 243 | } 244 | 245 | // Preparing Vars 246 | var b2sn int // init n=0, used for b2s() 247 | var count int // tmp file count 248 | var onetime int // how many goroutines run at a time 249 | lock := make(chan bool) // main-routine waits till goroutines finish 250 | Clientvar = &http.Client{ 251 | Transport: &http.Transport{ 252 | MaxIdleConnsPerHost: MaxIdleConnections, 253 | DisableCompression: true, // client will compress it by default 254 | }, 255 | Timeout: time.Duration(RequestTimeout) * time.Second, 256 | } 257 | res := getres(Clientvar, url, -1, -1) // -1 indicates no range specified 258 | acceptRange(res, dst) 259 | originlength := res.ContentLength 260 | onetime, count = precount(originlength, bufsize) 261 | tmpcreatedstat := make(chan bool, onetime) // ch with buff, to control batch scale 262 | tasks := make([]subtask, count) 263 | res.Body.Close() 264 | 265 | // Creating tmp dir 266 | tmpdirname := createTMPdir(tmpbase, tmpprefix) 267 | log.Printf("Originlength: %v", b2s(originlength, b2sn)) 268 | log.Printf("Created tmpdir: %s", tmpdirname) 269 | defer destroyTMPdir(tmpdirname, tmpprefix) // cleanning up after process 270 | 271 | // Multi-processing 272 | runtime.GOMAXPROCS(runtime.NumCPU()) 273 | for i := 0; i < int(count); i++ { 274 | go func(i int, tmpcreatedstat chan bool) { 275 | tasks[i].seq = int64(i) 276 | tasks[i].init(originlength, int64(count), url, tmpdirname) 277 | tmpcreatedstat <- tasks[i].tmpcreated // write to ch before tasks[i].get() starts to 'buffer' the routines 278 | // log.Printf("Started getting tmpfile: %s", tasks[i].tmpfname) 279 | tasks[i].get(Clientvar) 280 | // fmt.Printf("seq: %v , start: %v, end: %v, lenght: %v, islast: %t \n", tasks[i].seq, tasks[i].rgstart, tasks[i].rgend, tasks[i].length, tasks[i].islast) 281 | if tasks[i].tmpcreated { 282 | // log.Printf("Got tmpfile: %s", tasks[i].tmpfname) 283 | } 284 | lock <- true 285 | }(i, tmpcreatedstat) 286 | } 287 | 288 | // Sticking goroutines onto main 289 | for i := 0; i < int(count); i++ { 290 | <-lock 291 | <-tmpcreatedstat 292 | } 293 | 294 | // Outputting 295 | 296 | if reassemble(tasks, dst) { 297 | log.Printf("Downloaded: from %s to %s", url, dst) 298 | } 299 | 300 | } 301 | -------------------------------------------------------------------------------- /pcurl_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "io" 5 | "io/ioutil" 6 | "net/http" 7 | "testing" 8 | "time" 9 | ) 10 | 11 | func Test_Getres(t *testing.T) { 12 | //vars 13 | lock := make(chan bool, 0) 14 | MaxIdleConnections := 20 15 | RequestTimeout := 5 16 | Clientvar = &http.Client{ 17 | Transport: &http.Transport{ 18 | MaxIdleConnsPerHost: MaxIdleConnections, 19 | DisableCompression: true, // client will compress it by default 20 | }, 21 | Timeout: time.Duration(RequestTimeout) * time.Second, 22 | } 23 | call := func(i int) { 24 | res := getres(Clientvar, "http://mirrors.163.com/centos/7/isos/x86_64/", -1, -1) 25 | _, err := io.Copy(ioutil.Discard, res.Body) 26 | if err != nil { 27 | t.Error(err) 28 | } else { 29 | t.Log("#", i, " done @", time.Now().Format("15:04:05.99")) 30 | } 31 | res.Body.Close() 32 | } 33 | // 1st batch 34 | for i := 0; i < 2; i++ { 35 | go func(i int) { 36 | call(i) 37 | lock <- true 38 | }(i) 39 | 40 | } 41 | for i := 0; i < 2; i++ { 42 | <-lock 43 | } 44 | 45 | time.Sleep(1e9) 46 | 47 | // 2nd batch 48 | for i := 2; i < 4; i++ { 49 | go func(i int) { 50 | call(i) 51 | lock <- true 52 | }(i) 53 | } 54 | for i := 0; i < 2; i++ { 55 | <-lock 56 | } 57 | } 58 | --------------------------------------------------------------------------------