├── .gitignore ├── LICENSE ├── README.md ├── build_rpm ├── deploy.properties ├── example.hcl ├── go.mod ├── go.sum ├── main.go ├── misc.go ├── remirror.service └── remirror.spec /.gitignore: -------------------------------------------------------------------------------- 1 | /remirror 2 | /remirror.hcl 3 | *.rpm 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Joeliepoly 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # remirror 2 | Caching proxy for various public things (arch linux, fedora, centos, and other misc. things) 3 | 4 | To build, you need to have a working Go installation on your computer. (See https://golang.org/doc/install) 5 | 6 | Just check out the repository and then run: 7 | 8 | go build . 9 | ./remirror 10 | 11 | It defaults to cache it's files in /var/remirror and uses a hardcoded upstream mirror at Xmission at the moment. 12 | 13 | I've got a config-hcl branch with lots of configuration improvements--- It will be merged to master soon. 14 | 15 | See Also 16 | -------- 17 | A cool person has made an Ansible Playbook: https://gitlab.com/ciphermail/debops.remirror 18 | 19 | -------------------------------------------------------------------------------- /build_rpm: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | 4 | VERSION=0.0.4 5 | RELEASE=0 6 | 7 | PWD=`pwd` 8 | WORK=/tmp/less_shitty_rpmbuild_$$ 9 | 10 | go build -o remirror . 11 | 12 | rpmbuild --quiet --buildroot /tmp/$WORK \ 13 | -D "_version $VERSION" \ 14 | -D "_release $RELEASE" \ 15 | -D "_origin $PWD" \ 16 | -D "_sourcedir $WORK/source" \ 17 | -D "_rpmdir $WORK/rpm" \ 18 | -D "_builddir $WORK/build" \ 19 | -bb remirror.spec 20 | 21 | cp $WORK/rpm/*/*.rpm $PWD 22 | rm -rf $WORK 23 | 24 | -------------------------------------------------------------------------------- /deploy.properties: -------------------------------------------------------------------------------- 1 | artifact = remirror 2 | service = remirror 3 | install = /opt/remirror 4 | arch = x86_64 5 | 6 | user = web 7 | type = simple 8 | 9 | consul.service.name = remirror 10 | consul.service.port = 80 11 | consul.check.tcp = localhost:80 12 | 13 | execstart = /opt/remirror/remirror -listen :80 -data /opt/remirror/data 14 | 15 | # Post install command to allow remirror to listen on port 80 16 | post = /usr/sbin/setcap cap_net_bind_service=+ep /opt/remirror/remirror 17 | 18 | -------------------------------------------------------------------------------- /example.hcl: -------------------------------------------------------------------------------- 1 | listen = ":8084" 2 | data = "/var/remirror" 3 | 4 | mirrors { 5 | mirror { 6 | prefix = "/archlinux/" 7 | upstream = "https://mirrors.xmission.com" 8 | } 9 | 10 | mirror { 11 | prefix = "/centos/" 12 | upstream = "https://mirrors.xmission.com" 13 | } 14 | 15 | mirror { 16 | prefix = "/fedora/" 17 | upstream = "https://mirrors.xmission.com" 18 | } 19 | mirror { 20 | prefix = "/fedora-epel/" 21 | upstream = "https://mirrors.xmission.com" 22 | } 23 | mirror { 24 | prefix = "/golang/" 25 | upstream = "https://storage.googleapis.com" 26 | } 27 | } 28 | 29 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/yobert/remirror 2 | 3 | go 1.16 4 | 5 | require github.com/hashicorp/hcl v1.0.0 6 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= 4 | github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= 5 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "io/ioutil" 7 | "log" 8 | "net/http" 9 | "net/url" 10 | "os" 11 | "path" 12 | "strings" 13 | "sync" 14 | "time" 15 | 16 | "github.com/hashicorp/hcl" 17 | ) 18 | 19 | const VERSION = "0.0.6" 20 | 21 | type Config struct { 22 | Listen string // HTTP listen address. ":8084" 23 | Data string // Storage location for cached files. "/var/remirror" 24 | Mirrors []Mirror 25 | } 26 | type Mirror struct { 27 | // Prefix specifies a path that should be sent 28 | // to a certain upstream. E.g. "/archlinux/" 29 | Prefix string 30 | 31 | // Upstream specifies the upstream protocol and host. 32 | // You may also specify a path, in which case Prefix is 33 | // stripped from the incoming request, and what is left is 34 | // appended to the upstream path component. 35 | // 36 | // E.g. "https://mirrors.kernel.org" (/archlinux/somepackage will be preserved) 37 | // E.g. "http://mirror.cs.umn.edu/arch/" (/archlinux/thing will transform to /arch/thing) 38 | Upstream string 39 | 40 | // Upstreams specifies multiple Upstream entries. You can specify both (all will be used). 41 | Upstreams []string 42 | 43 | // Local should be used instead of Upstream for a locally served folder. 44 | // Incoming requests will have Prefix stripped off before being sent to Local. 45 | // E.g. "/home/you/localrepos/archlinux" 46 | Local string 47 | 48 | // If nil, default match set will be used 49 | Matches []Match 50 | } 51 | type Match struct { 52 | Prefix string 53 | Suffix string 54 | Skip bool // skip = true means this is a "don't match" rule 55 | } 56 | 57 | func (mirror Mirror) String() string { 58 | s := mirror.Local 59 | if s == "" { 60 | count := 0 61 | if mirror.Upstream != "" { 62 | s = mirror.Upstream 63 | count++ 64 | } 65 | if s == "" && len(mirror.Upstreams) > 0 { 66 | s = mirror.Upstreams[0] 67 | } 68 | count += len(mirror.Upstreams) 69 | if count > 1 { 70 | s += fmt.Sprintf(" (+ %d more...)", count-1) 71 | } 72 | } 73 | s += " " 74 | for i, m := range mirror.Matches { 75 | ss := m.Prefix + "*" + m.Suffix 76 | if m.Skip { 77 | ss += " skip" 78 | } 79 | if i+1 < len(mirror.Matches) { 80 | ss += ", " 81 | } 82 | s += ss 83 | } 84 | return fmt.Sprintf("%-20s » %s", mirror.Prefix, s) 85 | } 86 | 87 | var ( 88 | http_client = http.Client{} 89 | 90 | downloads_mu sync.Mutex 91 | downloads = map[string]*Download{} 92 | ) 93 | 94 | type Download struct { 95 | resp *http.Response 96 | 97 | tmp_path string 98 | tmp_done chan struct{} // will be closed when download is done and final bytes written 99 | } 100 | 101 | func (mirror Mirror) should_cache(path string) bool { 102 | // Special rules for Debian/Ubuntu 103 | if strings.HasSuffix(path, "/Packages.gz") || strings.HasSuffix(path, "/Sources.gz") { 104 | return false 105 | } 106 | 107 | // Special rules for Arch 108 | if strings.HasSuffix(path, ".abs.tar.gz") || 109 | strings.HasSuffix(path, ".db.tar.gz") || 110 | strings.HasSuffix(path, ".files.tar.gz") || 111 | strings.HasSuffix(path, ".links.tar.gz") { 112 | return false 113 | } 114 | 115 | // Use custom match rules? 116 | if len(mirror.Matches) > 0 { 117 | for _, m := range mirror.Matches { 118 | if strings.HasPrefix(path, m.Prefix) && 119 | strings.HasSuffix(path, m.Suffix) { 120 | return !m.Skip 121 | } 122 | } 123 | return false 124 | } 125 | 126 | // Otherwise cache everything that looks like an archive. 127 | if strings.HasSuffix(path, ".xz") || 128 | strings.HasSuffix(path, ".gz") || 129 | strings.HasSuffix(path, ".bz2") || 130 | strings.HasSuffix(path, ".zip") || 131 | strings.HasSuffix(path, ".tgz") || 132 | strings.HasSuffix(path, ".rpm") || 133 | strings.HasSuffix(path, "-rpm.bin") || 134 | strings.HasSuffix(path, ".deb") || 135 | strings.HasSuffix(path, ".jar") || 136 | strings.HasSuffix(path, ".xz.sig") { 137 | return true 138 | } 139 | return false 140 | } 141 | 142 | func (mirror Mirror) CreateHandler(config *Config, fileserver http.Handler) (http.Handler, error) { 143 | 144 | if mirror.Local != "" { 145 | return http.StripPrefix(mirror.Prefix, http.FileServer(http.Dir(mirror.Local))), nil 146 | } 147 | 148 | upstreams := []*url.URL{} 149 | 150 | if mirror.Upstream != "" { 151 | upstream, err := url.Parse(mirror.Upstream) 152 | if err != nil { 153 | return nil, err 154 | } 155 | upstreams = append(upstreams, upstream) 156 | } 157 | for _, u := range mirror.Upstreams { 158 | upstream, err := url.Parse(u) 159 | if err != nil { 160 | return nil, err 161 | } 162 | upstreams = append(upstreams, upstream) 163 | } 164 | 165 | return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 166 | log.Println(r.Method + " http://" + r.Host + r.RequestURI) 167 | 168 | err := func() error { 169 | 170 | for _, upstream := range upstreams { 171 | 172 | local_path := "" 173 | remote_url := upstream.Scheme + "://" + upstream.Host 174 | 175 | // Ugh... This is not the right way to do this. 176 | // I'm not sure how to make it encode + to %, 177 | // while not encoding / 178 | remote_url = strings.Replace(remote_url, "+", "%2B", -1) 179 | 180 | if upstream.Path == "" { 181 | remote_url += path.Clean(r.URL.Path) 182 | } else { 183 | remote_url += path.Clean(upstream.Path + "/" + strings.TrimPrefix(r.URL.Path, mirror.Prefix)) 184 | } 185 | 186 | if mirror.should_cache(remote_url) { 187 | local_path = config.Data + path.Clean(r.URL.Path) 188 | 189 | _, err := os.Stat(local_path) 190 | if err == nil { 191 | fileserver.ServeHTTP(w, r) 192 | return nil 193 | } 194 | } 195 | 196 | var download *Download 197 | var ok bool 198 | 199 | downloads_mu.Lock() 200 | 201 | if r.Header.Get("Range") == "" && local_path != "" { 202 | download, ok = downloads[local_path] 203 | if ok { 204 | fh, err := os.Open(download.tmp_path) 205 | downloads_mu.Unlock() 206 | if err != nil { 207 | return err 208 | } 209 | return tmp_download(local_path, w, download, fh) 210 | } 211 | } 212 | 213 | // downloads_mu is still locked. take care. 214 | // we need to keep it locked until we have 215 | // registered a download, opened a temp file, 216 | // and saved it's path into the tmp_path in 217 | // the struct. 218 | // then we need to make sure to release. 219 | 220 | log.Println("-->", remote_url) 221 | 222 | req, err := http.NewRequest("GET", remote_url, nil) 223 | if err != nil { 224 | downloads_mu.Unlock() 225 | return err 226 | } 227 | 228 | for k, vs := range r.Header { 229 | if !hopHeaders[k] { 230 | for _, v := range vs { 231 | req.Header.Add(k, v) 232 | } 233 | } 234 | } 235 | 236 | resp, err := http_client.Do(req) 237 | if err != nil { 238 | downloads_mu.Unlock() 239 | return err 240 | } 241 | defer resp.Body.Close() 242 | 243 | // Try another mirror if we get certain status codes 244 | if resp.StatusCode == 404 || 245 | resp.StatusCode == 500 || 246 | resp.StatusCode == 503 { 247 | downloads_mu.Unlock() 248 | continue 249 | } 250 | 251 | out := io.Writer(w) 252 | 253 | tmp_path := "" 254 | 255 | var tmp_needs_final_close io.Closer 256 | 257 | // We don't want to cache the result if the server 258 | // returns with a 206 Partial Content 259 | if resp.StatusCode == 200 && local_path != "" { 260 | tmp, err := ioutil.TempFile(config.Data, "remirror_tmp_") 261 | if err != nil { 262 | downloads_mu.Unlock() 263 | return err 264 | } 265 | tmp_needs_final_close = tmp 266 | tmp_path = tmp.Name() 267 | //fmt.Println("tmp", tmp_path) 268 | 269 | defer tmp.Close() 270 | defer os.Remove(tmp_path) 271 | 272 | out = io.MultiWriter(out, tmp) 273 | 274 | // at this point we have a "successful" download in 275 | // progress. save into the struct. 276 | download = &Download{ 277 | resp: resp, 278 | tmp_path: tmp_path, 279 | tmp_done: make(chan struct{}), 280 | } 281 | downloads[local_path] = download 282 | } 283 | // release the mutex. if we have a successful download in 284 | // progress, we have stored it correctly so far. if not, 285 | // we unlock, leaving the download struct unmodified. the 286 | // next request to try that URL will retry. 287 | downloads_mu.Unlock() 288 | 289 | // however we quit, we want to clear the download in progress 290 | // entry. this deferred func should run before the deferred 291 | // cleanup funcs above, so the filehandle should still be 292 | // valid when we clear it out. 293 | defer func() { 294 | if download == nil { 295 | // we didn't end up using the map for some reason. 296 | // (maybe empty content length, non 200 response, etc) 297 | return 298 | } 299 | 300 | // make sure final close has been called. things might still 301 | // be writing, and we need that to be done before 302 | // we close tmp_done 303 | _ = tmp_needs_final_close.Close() 304 | 305 | close(download.tmp_done) 306 | 307 | downloads_mu.Lock() 308 | delete(downloads, local_path) 309 | downloads_mu.Unlock() 310 | }() 311 | 312 | write_resp_headers(w, resp) 313 | 314 | n, err := io.Copy(out, resp.Body) 315 | if err != nil { 316 | log.Println(err) 317 | return nil 318 | } 319 | 320 | if n != resp.ContentLength && resp.ContentLength != -1 { 321 | log.Printf("Short data returned from server (Content-Length %d received %d)\n", resp.ContentLength, n) 322 | 323 | // Not really an HTTP error, leave it up to the client. 324 | // but we aren't going to save our response to the cache. 325 | return nil 326 | } 327 | 328 | if tmp_path != "" { 329 | os.MkdirAll(path.Dir(local_path), 0755) 330 | 331 | err = tmp_needs_final_close.Close() 332 | if err != nil { 333 | log.Println(err) 334 | return nil 335 | } 336 | 337 | // clear from struct before renaming 338 | if download != nil { 339 | close(download.tmp_done) 340 | downloads_mu.Lock() 341 | delete(downloads, local_path) 342 | downloads_mu.Unlock() 343 | download = nil // so we don't re-close 344 | } 345 | 346 | err = os.Rename(tmp_path, local_path) 347 | if err != nil { 348 | log.Println(err) 349 | return nil 350 | } 351 | log.Println(">:)") 352 | } 353 | 354 | return nil 355 | 356 | } 357 | 358 | return HTTPError(404) 359 | 360 | }() 361 | 362 | he, ok := err.(HTTPError) 363 | if ok { 364 | http.Error(w, he.Error(), he.Code()) 365 | fmt.Println("\t\t", he.Error()) 366 | } else if err != nil { 367 | http.Error(w, err.Error(), 500) 368 | fmt.Println("\t\t500 " + err.Error()) 369 | } 370 | }), nil 371 | } 372 | 373 | func load_configs(config *Config) error { 374 | try := []string{"remirror.hcl"} 375 | home := os.Getenv("HOME") 376 | if home != "" { 377 | try = append(try, home+"/.remirror.hcl") 378 | } 379 | try = append(try, "/etc/remirror.hcl") 380 | 381 | for _, t := range try { 382 | _, err := os.Stat(t) 383 | if err == nil { 384 | log.Printf("Loading configuration from %#v ...\n", t) 385 | config_bytes, err := ioutil.ReadFile(t) 386 | if err != nil { 387 | return err 388 | } 389 | if err := hcl.Unmarshal(config_bytes, config); err != nil { 390 | return err 391 | } 392 | return nil 393 | } 394 | } 395 | return fmt.Errorf("No files found: Create one of %s", strings.Join(try, ", ")) 396 | } 397 | 398 | func main() { 399 | for _, arg := range os.Args[1:] { 400 | if arg == "--version" { 401 | fmt.Println("remirror", VERSION) 402 | os.Exit(0) 403 | } 404 | fmt.Println("Unhandled argument", arg) 405 | os.Exit(1) 406 | } 407 | 408 | config := &Config{} 409 | 410 | if err := load_configs(config); err != nil { 411 | log.Fatalf("Config error: %v", err) 412 | } 413 | 414 | fileserver := http.FileServer(http.Dir(config.Data)) 415 | 416 | for _, mirror := range config.Mirrors { 417 | handler, err := mirror.CreateHandler(config, fileserver) 418 | if err == nil { 419 | log.Println(mirror, " ✓ ") 420 | http.Handle(mirror.Prefix, handler) 421 | } else { 422 | log.Println(mirror, " ✗ Error:", err) 423 | } 424 | } 425 | 426 | log.Println("remirror listening on HTTP", config.Listen, "with data cache", config.Data) 427 | log.Fatal(http.ListenAndServe(config.Listen, nil)) 428 | } 429 | 430 | func write_resp_headers(w http.ResponseWriter, resp *http.Response) { 431 | 432 | for k, vs := range resp.Header { 433 | if k == "Accept-Ranges" { 434 | continue 435 | } 436 | for _, v := range vs { 437 | //fmt.Printf("proxy back header %#v\t%#v\n", k, v) 438 | w.Header().Add(k, v) 439 | } 440 | } 441 | 442 | w.Header().Set("Server", "remirror") 443 | w.WriteHeader(resp.StatusCode) 444 | } 445 | 446 | // return a download in progress started by another request 447 | func tmp_download(local_path string, w http.ResponseWriter, download *Download, tmp io.ReadCloser) error { 448 | defer tmp.Close() 449 | 450 | write_resp_headers(w, download.resp) 451 | 452 | written := int64(0) 453 | done := false 454 | last := time.Now() 455 | 456 | for { 457 | n, err := io.Copy(w, tmp) 458 | 459 | if n < 0 { 460 | panic(fmt.Sprintf("io.Copy returned n %d: Not what I expected!", n)) 461 | } 462 | 463 | written += n 464 | 465 | if err != nil && err != io.EOF { 466 | log.Printf("Error while reading concurrent download %#s from %#s: %v\n", 467 | local_path, download.tmp_path, err) 468 | // Not an HTTP error: just return, and the client will hopefully 469 | // handle a short read correctly. 470 | return nil 471 | } 472 | 473 | if n > 0 { 474 | // cool, try another copy. hopefully the file 475 | // has more bytes now 476 | last = time.Now() 477 | continue 478 | } 479 | 480 | if done { 481 | return nil 482 | } 483 | 484 | // sleep for a bit so the other download has a chance to write 485 | // more bytes. 486 | select { 487 | case <-time.After(time.Second): 488 | // 60 second timeout for the other goroutine to at least write _something_ 489 | if time.Since(last) > time.Minute { 490 | log.Println("Timeout while reading concurrent download %#s from %#s\n", 491 | local_path, 492 | download.tmp_path) 493 | // Not an HTTP error: just return, and the client will hopefully 494 | // handle a short read correctly. 495 | return nil 496 | } 497 | continue 498 | case <-download.tmp_done: 499 | done = true 500 | continue 501 | } 502 | } 503 | } 504 | -------------------------------------------------------------------------------- /misc.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | ) 7 | 8 | // Hop-by-hop headers. These are removed when sent upstream. 9 | // http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html 10 | var hopHeaders = map[string]bool{ 11 | "Connection": true, 12 | "Proxy-Connection": true, 13 | "Keep-Alive": true, 14 | "Proxy-Authenticate": true, 15 | "Proxy-Authorization": true, 16 | "Te": true, 17 | "Trailer": true, 18 | "Transfer-Encoding": true, 19 | "Upgrade": true, 20 | } 21 | 22 | type HTTPError int 23 | 24 | func (e HTTPError) Error() string { 25 | return fmt.Sprintf("HTTP %d %s", e, http.StatusText(e.Code())) 26 | } 27 | func (e HTTPError) Code() int { 28 | return int(e) 29 | } 30 | -------------------------------------------------------------------------------- /remirror.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=remirror 3 | After=network-online.target 4 | Wants=network-online.target 5 | 6 | [Service] 7 | User=remirror 8 | Type=simple 9 | ExecStart=/usr/bin/remirror 10 | 11 | [Install] 12 | WantedBy=multi-user.target 13 | -------------------------------------------------------------------------------- /remirror.spec: -------------------------------------------------------------------------------- 1 | 2 | Name: remirror 3 | Version: %{_version} 4 | Release: %{_release} 5 | Summary: An aggressively caching proxy for artifact mirroring 6 | Group: System Environment/Daemons 7 | License: MIT 8 | BuildArch: x86_64 9 | 10 | # no source rpms 11 | %define __os_install_post %{nil} 12 | 13 | # dont do magic jar stuff 14 | %define __osgi_provides %{nil} 15 | %define __osgi_requires %{nil} 16 | 17 | %description 18 | 19 | %prep 20 | 21 | %build 22 | 23 | %install 24 | 25 | mkdir -p %{buildroot}/usr/bin 26 | mkdir -p %{buildroot}/lib/systemd/system 27 | 28 | cp %{_origin}/remirror %{buildroot}/usr/bin/ 29 | cp %{_origin}/remirror.service %{buildroot}/lib/systemd/system/ 30 | 31 | %files 32 | %attr(0755, root, root) /usr/bin/remirror 33 | %attr(0644, root, root) /lib/systemd/system/remirror.service 34 | 35 | %post 36 | /usr/sbin/setcap cap_net_bind_service=+ep /usr/bin/remirror 37 | 38 | --------------------------------------------------------------------------------