├── .github ├── FUNDING.yml └── workflows │ ├── release.yml │ └── test.yml ├── .golangci.yml ├── Dockerfile ├── LICENSE ├── README.md ├── cmd └── serve │ └── main.go ├── go.mod ├── go.sum ├── service.go └── service_test.go /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [ysmood] 2 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | tags: 6 | - "v*" 7 | 8 | jobs: 9 | docker: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout 13 | uses: actions/checkout@v3 14 | 15 | - name: Docker meta 16 | id: meta 17 | uses: docker/metadata-action@v4 18 | with: 19 | images: | 20 | ghcr.io/go-rod/bartender 21 | tags: | 22 | type=semver,pattern={{version}} 23 | 24 | - name: Login 25 | uses: docker/login-action@v2 26 | with: 27 | registry: ghcr.io 28 | username: ${{ github.actor }} 29 | password: ${{ secrets.GITHUB_TOKEN }} 30 | 31 | - name: Build and push 32 | uses: docker/build-push-action@v4 33 | with: 34 | context: . 35 | push: true 36 | tags: ${{ steps.meta.outputs.tags }} 37 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | branches: 6 | - "**" 7 | 8 | pull_request: 9 | 10 | jobs: 11 | test-linux: 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/setup-go@v4 16 | with: 17 | go-version: 1.20.1 18 | 19 | - uses: actions/checkout@v3 20 | 21 | - run: go run github.com/ysmood/golangci-lint@latest 22 | 23 | - run: go test ./... 24 | -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | run: 2 | skip-dirs-use-default: false 3 | 4 | linters: 5 | enable-all: true 6 | disable: 7 | - wsl 8 | - contextcheck 9 | - gomnd 10 | - gochecknoinits 11 | - paralleltest 12 | - wrapcheck 13 | - gosec 14 | - gochecknoglobals 15 | - musttag 16 | - revive 17 | - varnamelen 18 | - depguard 19 | 20 | # Deprecated ones: 21 | - structcheck 22 | - interfacer 23 | - deadcode 24 | - varcheck 25 | - ifshort 26 | - exhaustivestruct 27 | - golint 28 | - maligned 29 | - nosnakecase 30 | - scopelint 31 | 32 | gocyclo: 33 | min-complexity: 15 34 | 35 | issues: 36 | exclude-use-default: false 37 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang as go 2 | 3 | COPY . /app 4 | WORKDIR /app 5 | RUN CGO_ENABLED=0 go build ./cmd/serve 6 | 7 | FROM ghcr.io/go-rod/rod:v0.113.4 8 | 9 | RUN mkdir /app 10 | WORKDIR /app 11 | COPY --from=go /app/serve ./ 12 | 13 | EXPOSE 3000 14 | 15 | CMD ./serve -p :3000 -t http://localhost:8080 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright 2023 Yad Smood 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | It's design to make SEO for single page application easier, so that you don't have to use Server-side rendering tricks. 4 | 5 | It acts like a transparent http proxy by default, it only actives when the client looks like a web crawler, such as Googlebot, Baiduspider, etc. 6 | 7 | ## Installation 8 | 9 | You can simplify use bartender as the gateway in front of your web server: 10 | 11 | ```bash 12 | docker run -p 3000:3000 ghcr.io/go-rod/bartender ./serve -p :3000 -t http://your-web-server:8080 13 | ``` 14 | 15 | A common data flow looks like this: 16 | 17 | ```mermaid 18 | graph TD; 19 | C[Client]-->B; 20 | 21 | subgraph B[Bartender] 22 | J{Is web crawler?}; 23 | J-->|Yes|R[Render with headless browser]; 24 | J-->|No|D[Transparent proxy]; 25 | end 26 | 27 | R-->H[Your web server]; 28 | D-->H; 29 | ``` 30 | 31 | If you want the best performance, you can install bartender behind a gateway like nginx, configure the gateway to proxy the request to bartender when the client looks like a web crawler. 32 | 33 | A common way to detect web crawler: [link](https://stackoverflow.com/a/2517444/1089063). 34 | 35 | A common data flow looks like this: 36 | 37 | ```mermaid 38 | graph TD; 39 | C[Client]-->T[Gateway]; 40 | T-->J{Is web crawler?}; 41 | J-->|Yes|B[Bartender]; 42 | J-->|No|H[Your web server]; 43 | B-->H; 44 | ``` 45 | -------------------------------------------------------------------------------- /cmd/serve/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "log" 6 | "net/http" 7 | "strings" 8 | "time" 9 | 10 | "github.com/go-rod/bartender" 11 | ) 12 | 13 | func main() { 14 | port := flag.String("p", ":3001", "port to listen on") 15 | target := flag.String("t", "", "target url to proxy") 16 | size := flag.Int("s", 2, "size of the pool") 17 | maxWait := flag.Duration("w", 3*time.Second, "max wait time for a page rendering") 18 | autoFree := flag.Duration("f", 10*time.Minute, "auto close each headless browser after the specified time") 19 | 20 | var bypassUAs StringsFlag = bartender.DefaultBypassUserAgentNames 21 | flag.Var(&bypassUAs, "u", "bypass the specified user-agent names") 22 | 23 | var blockList StringsFlag 24 | flag.Var(&blockList, "b", 25 | "block the requests that match the pattern, such as 'https://a.com/*', can set multiple ones") 26 | 27 | flag.Parse() 28 | 29 | if *target == "" { 30 | panic("cli option -t required") 31 | } 32 | 33 | log.Printf("Bartender started %s -> %s\n", *port, *target) 34 | log.Printf("Block list: %v\n", blockList) 35 | log.Printf("Bypass user-agent names: %v\n", bypassUAs) 36 | 37 | b := bartender.New(*port, *target, *size) 38 | b.BlockRequests(blockList...) 39 | b.BypassUserAgentNames(bypassUAs...) 40 | b.MaxWait(*maxWait) 41 | b.WarmUp() 42 | b.AutoFree(*autoFree) 43 | 44 | err := http.ListenAndServe(*port, b) 45 | if err != nil { 46 | log.Fatalln(err) 47 | } 48 | } 49 | 50 | type StringsFlag []string 51 | 52 | func (i *StringsFlag) String() string { 53 | return strings.Join(*i, ", ") 54 | } 55 | 56 | func (i *StringsFlag) Set(value string) error { 57 | *i = append(*i, value) 58 | 59 | return nil 60 | } 61 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/go-rod/bartender 2 | 3 | go 1.20 4 | 5 | require ( 6 | github.com/go-rod/rod v0.114.1 7 | github.com/mileusna/useragent v1.3.3 8 | github.com/ysmood/got v0.34.1 9 | ) 10 | 11 | require ( 12 | github.com/ysmood/fetchup v0.2.3 // indirect 13 | github.com/ysmood/goob v0.4.0 // indirect 14 | github.com/ysmood/gop v0.0.2 // indirect 15 | github.com/ysmood/gson v0.7.3 // indirect 16 | github.com/ysmood/leakless v0.8.0 // indirect 17 | ) 18 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/go-rod/rod v0.114.1 h1:osBWr88guzTXAIzwJWVmGZe3/utT9+lqKjkGSBsYMxw= 2 | github.com/go-rod/rod v0.114.1/go.mod h1:aiedSEFg5DwG/fnNbUOTPMTTWX3MRj6vIs/a684Mthw= 3 | github.com/mileusna/useragent v1.3.3 h1:hrIVmPevJY3ICS1Ob4yjqJToQiv2eD9iHaJBjxMihWY= 4 | github.com/mileusna/useragent v1.3.3/go.mod h1:3d8TOmwL/5I8pJjyVDteHtgDGcefrFUX4ccGOMKNYYc= 5 | github.com/ysmood/fetchup v0.2.3 h1:ulX+SonA0Vma5zUFXtv52Kzip/xe7aj4vqT5AJwQ+ZQ= 6 | github.com/ysmood/fetchup v0.2.3/go.mod h1:xhibcRKziSvol0H1/pj33dnKrYyI2ebIvz5cOOkYGns= 7 | github.com/ysmood/goob v0.4.0 h1:HsxXhyLBeGzWXnqVKtmT9qM7EuVs/XOgkX7T6r1o1AQ= 8 | github.com/ysmood/goob v0.4.0/go.mod h1:u6yx7ZhS4Exf2MwciFr6nIM8knHQIE22lFpWHnfql18= 9 | github.com/ysmood/gop v0.0.2 h1:VuWweTmXK+zedLqYufJdh3PlxDNBOfFHjIZlPT2T5nw= 10 | github.com/ysmood/gop v0.0.2/go.mod h1:rr5z2z27oGEbyB787hpEcx4ab8cCiPnKxn0SUHt6xzk= 11 | github.com/ysmood/got v0.34.1 h1:IrV2uWLs45VXNvZqhJ6g2nIhY+pgIG1CUoOcqfXFl1s= 12 | github.com/ysmood/got v0.34.1/go.mod h1:yddyjq/PmAf08RMLSwDjPyCvHvYed+WjHnQxpH851LM= 13 | github.com/ysmood/gotrace v0.6.0 h1:SyI1d4jclswLhg7SWTL6os3L1WOKeNn/ZtzVQF8QmdY= 14 | github.com/ysmood/gotrace v0.6.0/go.mod h1:TzhIG7nHDry5//eYZDYcTzuJLYQIkykJzCRIo4/dzQM= 15 | github.com/ysmood/gson v0.7.3 h1:QFkWbTH8MxyUTKPkVWAENJhxqdBa4lYTQWqZCiLG6kE= 16 | github.com/ysmood/gson v0.7.3/go.mod h1:3Kzs5zDl21g5F/BlLTNcuAGAYLKt2lV5G8D1zF3RNmg= 17 | github.com/ysmood/leakless v0.8.0 h1:BzLrVoiwxikpgEQR0Lk8NyBN5Cit2b1z+u0mgL4ZJak= 18 | github.com/ysmood/leakless v0.8.0/go.mod h1:R8iAXPRaG97QJwqxs74RdwzcRHT1SWCGTNqY8q0JvMQ= 19 | -------------------------------------------------------------------------------- /service.go: -------------------------------------------------------------------------------- 1 | // Package bartender is a service to make web crawlers consume webpages easier 2 | package bartender 3 | 4 | import ( 5 | "context" 6 | "log" 7 | "net/http" 8 | "net/http/httputil" 9 | "net/url" 10 | "strings" 11 | "sync" 12 | "time" 13 | 14 | "github.com/go-rod/rod" 15 | "github.com/go-rod/rod/lib/launcher" 16 | "github.com/go-rod/rod/lib/proto" 17 | "github.com/mileusna/useragent" 18 | ) 19 | 20 | var DefaultBypassUserAgentNames = []string{ 21 | useragent.Opera, 22 | useragent.OperaMini, 23 | useragent.OperaTouch, 24 | useragent.Chrome, 25 | useragent.HeadlessChrome, 26 | useragent.Firefox, 27 | useragent.InternetExplorer, 28 | useragent.Safari, 29 | useragent.Edge, 30 | useragent.Vivaldi, 31 | } 32 | 33 | type Bartender struct { 34 | addr string 35 | target *url.URL 36 | proxy *httputil.ReverseProxy 37 | bypassList map[string]bool 38 | pool rod.PagePool 39 | blockRequests []string 40 | maxWait time.Duration 41 | } 42 | 43 | func New(addr, target string, poolSize int) *Bartender { 44 | u, err := url.Parse(target) 45 | if err != nil { 46 | panic(err) 47 | } 48 | 49 | proxy := httputil.NewSingleHostReverseProxy(u) 50 | 51 | return &Bartender{ 52 | addr: addr, 53 | target: u, 54 | proxy: proxy, 55 | bypassList: strToMap(DefaultBypassUserAgentNames), 56 | pool: rod.NewPagePool(poolSize), 57 | blockRequests: []string{}, 58 | maxWait: 3 * time.Second, 59 | } 60 | } 61 | 62 | func (b *Bartender) BypassUserAgentNames(list ...string) { 63 | b.bypassList = strToMap(list) 64 | } 65 | 66 | func (b *Bartender) BlockRequests(patterns ...string) { 67 | b.blockRequests = patterns 68 | } 69 | 70 | // MaxWait sets the max wait time for the headless browser to render the page. 71 | // If the max wait time is reached, bartender will stop waiting for page rendering and 72 | // immediately return the current html. 73 | func (b *Bartender) MaxWait(d time.Duration) { 74 | b.maxWait = d 75 | } 76 | 77 | func (b *Bartender) getPage() *rod.Page { 78 | return b.pool.Get(b.newPage) 79 | } 80 | 81 | func (b *Bartender) newPage() *rod.Page { 82 | l := launcher.New() 83 | go l.Cleanup() 84 | 85 | page := rod.New().ControlURL(l.MustLaunch()).MustConnect().MustPage() 86 | 87 | if len(b.blockRequests) > 0 { 88 | router := page.HijackRequests() 89 | 90 | for _, pattern := range b.blockRequests { 91 | router.MustAdd(pattern, func(ctx *rod.Hijack) { 92 | ctx.Response.Fail(proto.NetworkErrorReasonBlockedByClient) 93 | }) 94 | } 95 | 96 | go router.Run() 97 | } 98 | 99 | log.Println("headless browser started:", page.SessionID) 100 | 101 | return page 102 | } 103 | 104 | // WarmUp pre-creates the headless browsers. 105 | func (b *Bartender) WarmUp() { 106 | for i := 0; i < len(b.pool); i++ { 107 | b.pool.Put(b.getPage()) 108 | } 109 | } 110 | 111 | // AutoFree automatically closes the each headless browser after a period of time. 112 | // It prevent the memory leak of the headless browser. 113 | func (b *Bartender) AutoFree(interval time.Duration) { 114 | go func() { 115 | for { 116 | time.Sleep(interval) 117 | 118 | page := b.getPage() 119 | browser := page.Browser() 120 | 121 | err := browser.Close() 122 | if err != nil { 123 | log.Println("failed to close browser:", err) 124 | 125 | continue 126 | } 127 | b.pool.Put(nil) 128 | log.Println("headless browser freed:", page.SessionID) 129 | } 130 | }() 131 | } 132 | 133 | func (b *Bartender) ServeHTTP(w http.ResponseWriter, r *http.Request) { 134 | ua := useragent.Parse(r.Header.Get("User-Agent")) 135 | if r.Method != http.MethodGet || b.bypassList[ua.Name] { 136 | b.proxy.ServeHTTP(w, r) 137 | 138 | return 139 | } 140 | 141 | if b.RenderPage(w, r) { 142 | return 143 | } 144 | 145 | b.proxy.ServeHTTP(w, r) 146 | } 147 | 148 | // RenderPage returns true if the page is rendered by the headless browser. 149 | func (b *Bartender) RenderPage(w http.ResponseWriter, r *http.Request) bool { 150 | u := b.getTargetURL(r.URL) 151 | 152 | statusCode, resHeader := getHeader(r.Context(), u) 153 | 154 | if !htmlContentType(resHeader) { 155 | return false 156 | } 157 | 158 | log.Println("headless render:", u) 159 | 160 | for k, vs := range resHeader { 161 | if k == "Content-Length" { 162 | continue 163 | } 164 | 165 | for _, v := range vs { 166 | w.Header().Add(k, v) 167 | } 168 | } 169 | 170 | w.WriteHeader(statusCode) 171 | 172 | page := b.getPage() 173 | defer b.pool.Put(page) 174 | 175 | page, cancel := page.Context(r.Context()).WithCancel() 176 | 177 | once := sync.Once{} 178 | 179 | go func() { 180 | time.Sleep(b.maxWait) 181 | once.Do(func() { 182 | log.Println("max wait time reached, return current html:", u) 183 | body, _ := page.HTML() 184 | _, _ = w.Write([]byte(body)) 185 | cancel() 186 | }) 187 | }() 188 | 189 | _ = page.Navigate(u) 190 | 191 | _ = page.WaitStable(time.Second) 192 | 193 | body, _ := page.HTML() 194 | 195 | once.Do(func() { 196 | log.Println("headless render done:", u) 197 | _, _ = w.Write([]byte(body)) 198 | }) 199 | 200 | return true 201 | } 202 | 203 | func (b *Bartender) getTargetURL(reqURL *url.URL) string { 204 | u := *reqURL 205 | u.Scheme = b.target.Scheme 206 | u.Host = b.target.Host 207 | 208 | return u.String() 209 | } 210 | 211 | func getHeader(ctx context.Context, u string) (int, http.Header) { 212 | req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil) 213 | if err != nil { 214 | panic(err) 215 | } 216 | 217 | res, err := http.DefaultClient.Do(req) 218 | if err != nil { 219 | panic(err) 220 | } 221 | 222 | _ = res.Body.Close() 223 | 224 | return res.StatusCode, res.Header 225 | } 226 | 227 | func htmlContentType(h http.Header) bool { 228 | return strings.Contains(h.Get("Content-Type"), "text/html") 229 | } 230 | 231 | func strToMap(list []string) map[string]bool { 232 | m := map[string]bool{} 233 | for _, s := range list { 234 | m[s] = true 235 | } 236 | 237 | return m 238 | } 239 | -------------------------------------------------------------------------------- /service_test.go: -------------------------------------------------------------------------------- 1 | package bartender_test 2 | 3 | import ( 4 | "net/http" 5 | "testing" 6 | 7 | "github.com/go-rod/bartender" 8 | "github.com/ysmood/got" 9 | ) 10 | 11 | func TestBasic(t *testing.T) { 12 | g := got.T(t) 13 | 14 | website := g.Serve() 15 | 16 | website.Route("/a.png", ".png", "image") 17 | website.Route("/", ".html", ` 18 |
19 | 24 | `) 25 | 26 | proxy := g.Serve() 27 | 28 | bt := bartender.New("", website.URL(), 2) 29 | 30 | proxy.Mux.HandleFunc("/", bt.ServeHTTP) 31 | 32 | { 33 | //nolint: lll 34 | // browser 35 | res := g.Req("", proxy.URL("/test?q=ok"), http.Header{"User-Agent": {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"}}) 36 | g.Has(res.String(), "") 37 | } 38 | 39 | { 40 | // web crawler 41 | res := g.Req("", proxy.URL("/test?q=ok")) 42 | g.Has(res.String(), "/test?q=ok") 43 | } 44 | 45 | { 46 | // can get image 47 | res := g.Req("", proxy.URL("/a.png")) 48 | g.Has(res.String(), "image") 49 | } 50 | } 51 | --------------------------------------------------------------------------------