├── .github ├── dependabot.yml └── workflows │ ├── auto-merge-dependabot.yml │ ├── build_docker_images.yml │ ├── codeql-analysis.yml │ ├── go.yml │ ├── golangci-lint.yml │ ├── release.yml │ └── update.yml ├── .golangci.yml ├── .goreleaser.yaml ├── Dockerfile ├── LICENSE ├── Readme.md ├── Taskfile.yml ├── go.mod ├── go.sum └── main.go /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "gomod" 9 | directory: "/" 10 | schedule: 11 | interval: "weekly" 12 | 13 | - package-ecosystem: "github-actions" 14 | directory: "/" 15 | schedule: 16 | # Check for updates to GitHub Actions every weekday 17 | interval: "daily" 18 | 19 | - package-ecosystem: docker 20 | directory: "/" 21 | schedule: 22 | interval: "weekly" 23 | 24 | - package-ecosystem: "devcontainers" 25 | directory: "/" 26 | schedule: 27 | interval: "weekly" 28 | -------------------------------------------------------------------------------- /.github/workflows/auto-merge-dependabot.yml: -------------------------------------------------------------------------------- 1 | name: Auto-merge dependabot updates 2 | 3 | on: 4 | pull_request: 5 | branches: [main] 6 | 7 | permissions: 8 | pull-requests: write 9 | contents: write 10 | 11 | jobs: 12 | 13 | dependabot-merge: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | if: ${{ github.actor == 'dependabot[bot]' }} 18 | 19 | steps: 20 | - name: Dependabot metadata 21 | id: metadata 22 | uses: dependabot/fetch-metadata@v2.4.0 23 | with: 24 | github-token: "${{ secrets.GITHUB_TOKEN }}" 25 | 26 | - name: Enable auto-merge for Dependabot PRs 27 | # Only if version bump is not a major version change 28 | if: ${{steps.metadata.outputs.update-type != 'version-update:semver-major'}} 29 | run: gh pr merge --auto --merge "$PR_URL" 30 | env: 31 | PR_URL: ${{github.event.pull_request.html_url}} 32 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} 33 | -------------------------------------------------------------------------------- /.github/workflows/build_docker_images.yml: -------------------------------------------------------------------------------- 1 | name: Build Docker Images 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | workflow_dispatch: 8 | schedule: 9 | - cron: "0 0 * * *" 10 | 11 | jobs: 12 | build_docker_images: 13 | timeout-minutes: 30 14 | runs-on: ubuntu-latest 15 | permissions: 16 | contents: read 17 | packages: write 18 | 19 | steps: 20 | - name: checkout sources 21 | uses: actions/checkout@v4 22 | 23 | - name: Set up QEMU 24 | uses: docker/setup-qemu-action@v3 25 | 26 | - name: Set up Docker Buildx 27 | uses: docker/setup-buildx-action@v3 28 | 29 | - name: Login to Docker Hub 30 | uses: docker/login-action@v3.4.0 31 | with: 32 | username: ${{ secrets.DOCKERHUB_USERNAME }} 33 | password: ${{ secrets.DOCKERHUB_TOKEN }} 34 | 35 | - name: Login to GitHub Container Registry 36 | uses: docker/login-action@v3.4.0 37 | with: 38 | registry: ghcr.io 39 | username: ${{ github.repository_owner }} 40 | password: ${{ secrets.GITHUB_TOKEN }} 41 | 42 | - name: Build and push 43 | uses: docker/build-push-action@v6 44 | with: 45 | push: true 46 | # platforms: linux/amd64,linux/arm/v7,linux/arm64/v8,linux/386,linux/ppc64le 47 | tags: | 48 | firefart/gochro:latest 49 | ghcr.io/firefart/gochro:latest 50 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ main ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ main ] 20 | schedule: 21 | - cron: '35 20 * * 0' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ubuntu-latest 27 | permissions: 28 | actions: read 29 | contents: read 30 | security-events: write 31 | 32 | strategy: 33 | fail-fast: false 34 | matrix: 35 | language: [ 'go' ] 36 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] 37 | # Learn more about CodeQL language support at https://git.io/codeql-language-support 38 | 39 | steps: 40 | - name: Checkout repository 41 | uses: actions/checkout@v4 42 | 43 | - uses: actions/setup-go@v5 44 | with: 45 | go-version: "stable" 46 | 47 | # Initializes the CodeQL tools for scanning. 48 | - name: Initialize CodeQL 49 | uses: github/codeql-action/init@v3 50 | with: 51 | languages: ${{ matrix.language }} 52 | # If you wish to specify custom queries, you can do so here or in a config file. 53 | # By default, queries listed here will override any specified in a config file. 54 | # Prefix the list here with "+" to use these queries and those in the config file. 55 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 56 | 57 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 58 | # If this step fails, then you should remove it and run the build manually (see below) 59 | - name: Autobuild 60 | uses: github/codeql-action/autobuild@v3 61 | 62 | # ℹ️ Command-line programs to run using the OS shell. 63 | # 📚 https://git.io/JvXDl 64 | 65 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 66 | # and modify them (or add more) to build your code if your project 67 | # uses a compiled language 68 | 69 | #- run: | 70 | # make bootstrap 71 | # make release 72 | 73 | - name: Perform CodeQL Analysis 74 | uses: github/codeql-action/analyze@v3 75 | -------------------------------------------------------------------------------- /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | on: [push, pull_request] 3 | jobs: 4 | build: 5 | name: Build 6 | timeout-minutes: 30 7 | runs-on: ubuntu-latest 8 | steps: 9 | - name: Check out code 10 | uses: actions/checkout@v4 11 | 12 | - name: Set up Go 13 | uses: actions/setup-go@v5 14 | with: 15 | go-version: "stable" 16 | 17 | - name: Install Task 18 | uses: arduino/setup-task@v2 19 | with: 20 | repo-token: ${{ secrets.GITHUB_TOKEN }} 21 | 22 | - name: Get dependencies 23 | run: | 24 | go get -v -t -d ./... 25 | 26 | - name: Build 27 | run: task build 28 | 29 | - name: Test 30 | run: task test 31 | -------------------------------------------------------------------------------- /.github/workflows/golangci-lint.yml: -------------------------------------------------------------------------------- 1 | name: golangci-lint 2 | on: [push, workflow_dispatch] 3 | jobs: 4 | golangci: 5 | name: lint 6 | timeout-minutes: 30 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v4 10 | 11 | - uses: actions/setup-go@v5 12 | with: 13 | go-version: "stable" 14 | 15 | - name: golangci-lint 16 | uses: golangci/golangci-lint-action@v8 17 | with: 18 | version: latest 19 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: goreleaser 2 | 3 | on: 4 | push: 5 | tags: 6 | - "*" 7 | 8 | permissions: 9 | contents: write 10 | 11 | jobs: 12 | goreleaser: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Checkout 16 | uses: actions/checkout@v4 17 | with: 18 | fetch-depth: 0 19 | 20 | - name: Fetch all tags 21 | run: git fetch --force --tags 22 | 23 | - name: Set up Go 24 | uses: actions/setup-go@v5 25 | with: 26 | go-version: "stable" 27 | 28 | - name: Run GoReleaser 29 | uses: goreleaser/goreleaser-action@v6.3.0 30 | with: 31 | distribution: goreleaser 32 | version: latest 33 | args: release --clean 34 | env: 35 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 36 | -------------------------------------------------------------------------------- /.github/workflows/update.yml: -------------------------------------------------------------------------------- 1 | name: Update 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: "0 12 * * *" 7 | 8 | jobs: 9 | update: 10 | timeout-minutes: 30 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: checkout 14 | uses: actions/checkout@v4 15 | with: 16 | token: ${{ secrets.PERSONAL_ACCESS_TOKEN_UPDATE }} 17 | 18 | - name: Set up Go 19 | uses: actions/setup-go@v5 20 | with: 21 | go-version: "stable" 22 | 23 | - name: Install Task 24 | uses: arduino/setup-task@v2 25 | with: 26 | repo-token: ${{ secrets.GITHUB_TOKEN }} 27 | 28 | - name: update 29 | run: | 30 | task update 31 | 32 | - name: setup git config 33 | run: | 34 | git config user.name "Github" 35 | git config user.email "<>" 36 | 37 | - name: commit changes 38 | # need to override the default shell so we can check 39 | # for error codes. Otherwise it will always fail if 40 | # one command returns an error code other than 0 41 | shell: bash --noprofile --norc -o pipefail {0} 42 | run: | 43 | git diff-index --quiet HEAD -- 44 | exit_status=$? 45 | if [ $exit_status -eq 0 ]; then 46 | echo "nothing has changed" 47 | else 48 | git add go.mod go.sum 49 | git commit -m "auto update from github actions" 50 | git push origin main 51 | fi 52 | -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | linters: 3 | enable: 4 | - nonamedreturns 5 | exclusions: 6 | generated: lax 7 | presets: 8 | - comments 9 | - common-false-positives 10 | - legacy 11 | - std-error-handling 12 | paths: 13 | - third_party$ 14 | - builtin$ 15 | - examples$ 16 | formatters: 17 | exclusions: 18 | generated: lax 19 | paths: 20 | - third_party$ 21 | - builtin$ 22 | - examples$ 23 | -------------------------------------------------------------------------------- /.goreleaser.yaml: -------------------------------------------------------------------------------- 1 | # This is an example .goreleaser.yml file with some sensible defaults. 2 | # Make sure to check the documentation at https://goreleaser.com 3 | 4 | # The lines below are called `modelines`. See `:help modeline` 5 | # Feel free to remove those if you don't want/need to use them. 6 | # yaml-language-server: $schema=https://goreleaser.com/static/schema.json 7 | # vim: set ts=2 sw=2 tw=0 fo=cnqoj 8 | 9 | version: 2 10 | 11 | before: 12 | hooks: 13 | # You may remove this if you don't use go modules. 14 | - go mod tidy 15 | # you may remove this if you don't need go generate 16 | - go generate ./... 17 | 18 | builds: 19 | - env: 20 | - CGO_ENABLED=0 21 | goos: 22 | - linux 23 | - windows 24 | - darwin 25 | 26 | archives: 27 | - format: tar.gz 28 | # this name template makes the OS and Arch compatible with the results of `uname`. 29 | name_template: >- 30 | {{ .ProjectName }}_ 31 | {{- title .Os }}_ 32 | {{- if eq .Arch "amd64" }}x86_64 33 | {{- else if eq .Arch "386" }}i386 34 | {{- else }}{{ .Arch }}{{ end }} 35 | {{- if .Arm }}v{{ .Arm }}{{ end }} 36 | # use zip for windows archives 37 | format_overrides: 38 | - goos: windows 39 | format: zip 40 | 41 | changelog: 42 | sort: asc 43 | filters: 44 | exclude: 45 | - "^docs:" 46 | - "^test:" 47 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:latest AS build-env 2 | WORKDIR /src 3 | ENV CGO_ENABLED=0 4 | COPY go.* /src/ 5 | RUN go mod download 6 | COPY main.go . 7 | RUN go build -a -o gochro -ldflags="-s -w" -trimpath 8 | 9 | FROM alpine:latest 10 | 11 | RUN apk add --no-cache chromium \ 12 | && rm -rf /var/cache/apk \ 13 | && mkdir -p /var/cache/apk 14 | 15 | RUN mkdir -p /app \ 16 | && adduser -D chrome \ 17 | && chown -R chrome:chrome /app 18 | 19 | USER chrome 20 | WORKDIR /app 21 | 22 | ENV CHROME_BIN=/usr/bin/chromium-browser \ 23 | CHROME_PATH=/usr/lib/chromium/ 24 | 25 | COPY --from=build-env /src/gochro . 26 | 27 | ENTRYPOINT [ "./gochro" ] 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Christian Mehlmauer 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # gochro 2 | 3 | gochro is a small docker image with chromium installed and a golang based webserver to interact with it. It can be used to take screenshots of websites using chromium-headless and convert HTML pages to PDF. 4 | 5 | If errors occur the error will be logged to stdout and a non information leaking error message is presented to the user. 6 | 7 | This project was used on [https://wpscan.io](https://wpscan.io) for taking website screenshots and to generate PDF reports. 8 | 9 | ## Screenshot 10 | 11 | This URL takes a Screenshot of [https://firefart.at](https://firefart.at) with a resolution of 1024x768 and returns an image. 12 | 13 | [http://localhost:8080/screenshot?url=https://firefart.at&w=1024&h=768](http://localhost:8080/screenshot?url=https://firefart.at&w=1024&h=768) 14 | 15 | ## HTML 2 PDF 16 | 17 | Send a POST request with the HTML you want to convert in the Post body to the following url. 18 | 19 | [http://localhost:8080/html2pdf?w=1024&h=768](http://localhost:8080/html2pdf?w=1024&h=768) 20 | 21 | This will return a PDF of the HTML input. 22 | 23 | Example: 24 | 25 | ```text 26 | POST /html2pdf?w=1024&h=768 HTTP/1.1 27 | Host: localhost:8000 28 | Content-Type: application/x-www-form-urlencoded 29 | Content-Length: 119 30 | 31 | 32 | Test Page 33 | 34 |

This is a test

35 |

This is a test

36 | 37 | 38 | ``` 39 | 40 | Example as curl: 41 | 42 | ```text 43 | curl -s -k -X 'POST' -o test.pdf --data-binary '

test

' 'http://127.0.0.1:8000/html2pdf' 44 | ``` 45 | 46 | ## URL 2 PDF 47 | 48 | Send a GET request to the following url to get the response as PDF. 49 | 50 | [http://localhost:8080/url2pdf?url=https://firefart.at&w=1024&h=768](http://localhost:8080/url2pdf?url=https://firefart.at&w=1024&h=768) 51 | 52 | ## Run server 53 | 54 | To run this image you should use the [seccomp profile](https://github.com/jessfraz/dotfiles/blob/master/etc/docker/seccomp/chrome.json) provided by [Jess Frazelle](https://github.com/jessfraz). The privileges on the host are needed for chromiums internal security sandbox. You can also deactivate the sandbox on chromium (would require changes in `main.go`) but that's a bad idea and puts your server at risk, so please use the seccomp profile instead. 55 | 56 | Be sure to use the --init switch to get rid of zombie processes of chromium. 57 | 58 | ### Command Line Options 59 | ```text 60 | -host The host and port to listen of (refers to inside the container). Defaults to 0.0.0.0:8000 61 | -debug Enables debug output. Default: false 62 | -ignore-cert-errors Also fetch ressources from origins with untrusted certificates or cert errors. 63 | -proxy Use a proxy server to connect to the internet. Please use format IP:PORT without a protocol. Example: 1.2.3.4:3128 64 | ``` 65 | 66 | ### Use the docker hub image 67 | 68 | You can also use the [prebuit image](https://hub.docker.com/r/firefart/gochro) from dockerhub. 69 | 70 | To pull the image run 71 | 72 | ```bash 73 | docker pull firefart/gochro 74 | ``` 75 | 76 | ### Include in docker-compose 77 | 78 | If you want to include this image in a docker-compose file you can use the following example. Just connect the `gochronet` to the other service so the containers can communicate with each other. 79 | 80 | Please note that the `0.0.0.0` in the command only applies to the network inside the docker container itself. If you want to access it from your local machine you need to add a port mapping. 81 | 82 | ```yml 83 | version: '3.7' 84 | 85 | services: 86 | gochro: 87 | image: firefart/gochro 88 | init: true 89 | container_name: gochro 90 | security_opt: 91 | - seccomp="chrome.json" 92 | command: -host 0.0.0.0:8000 93 | networks: 94 | - gochronet 95 | 96 | networks: 97 | gochronet: 98 | driver: bridge 99 | ``` 100 | -------------------------------------------------------------------------------- /Taskfile.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | vars: 4 | PROGRAM: gochro 5 | 6 | tasks: 7 | update: 8 | cmds: 9 | - go get -u 10 | - go mod tidy -v 11 | 12 | build: 13 | aliases: [default] 14 | cmds: 15 | - go fmt ./... 16 | - go vet ./... 17 | - go build -o {{.PROGRAM}} 18 | 19 | test: 20 | env: 21 | CGO_ENABLED: 1 # required by -race 22 | cmds: 23 | - go test -race -cover ./... 24 | 25 | run: 26 | cmds: 27 | - ./{{.PROGRAM}} -debug -testmode -config config.json 28 | 29 | lint: 30 | cmds: 31 | - golangci-lint run ./... --timeout=30m 32 | - go mod tidy 33 | 34 | lint-update: 35 | cmds: 36 | - curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b {{ .GOPATH }}/bin 37 | - golangci-lint --version 38 | vars: 39 | GOPATH: 40 | sh: go env GOPATH 41 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/FireFart/gochro 2 | 3 | go 1.24 4 | 5 | require ( 6 | github.com/gorilla/handlers v1.5.2 7 | github.com/gorilla/mux v1.8.1 8 | github.com/sirupsen/logrus v1.9.3 9 | ) 10 | 11 | require ( 12 | github.com/felixge/httpsnoop v1.0.4 // indirect 13 | golang.org/x/sys v0.33.0 // indirect 14 | ) 15 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 2 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 3 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 4 | github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= 5 | github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= 6 | github.com/gorilla/handlers v1.5.2 h1:cLTUSsNkgcwhgRqvCNmdbRWG0A3N4F+M2nWKdScwyEE= 7 | github.com/gorilla/handlers v1.5.2/go.mod h1:dX+xVpaxdSw+q0Qek8SSsl3dfMk3jNddUkMzo0GtH0w= 8 | github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= 9 | github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= 10 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 11 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 12 | github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= 13 | github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= 14 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 15 | github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= 16 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 17 | golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 18 | golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= 19 | golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= 20 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 21 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= 22 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 23 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // shell switches: 4 | // https://source.chromium.org/chromium/chromium/src/+/main:headless/app/headless_shell_switches.cc 5 | 6 | import ( 7 | "bytes" 8 | "context" 9 | "flag" 10 | "fmt" 11 | "io" 12 | "math/rand" 13 | "net/http" 14 | "os" 15 | "os/exec" 16 | "os/signal" 17 | "path" 18 | "path/filepath" 19 | "runtime" 20 | "runtime/debug" 21 | "strconv" 22 | "syscall" 23 | "time" 24 | 25 | "github.com/gorilla/handlers" 26 | "github.com/gorilla/mux" 27 | log "github.com/sirupsen/logrus" 28 | ) 29 | 30 | const ( 31 | chromiumPath = "/usr/bin/chromium-browser" 32 | defaultGracefulTimeout = 5 * time.Second 33 | ) 34 | 35 | var ( 36 | debugOutput = false 37 | ignoreCertErrors = true 38 | proxyServer = "" 39 | disableSandbox = false 40 | ) 41 | 42 | type application struct{} 43 | 44 | var letterRunes = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") 45 | 46 | func randStringRunes(n int) string { 47 | b := make([]rune, n) 48 | for i := range b { 49 | b[i] = letterRunes[rand.Intn(len(letterRunes))] 50 | } 51 | return string(b) 52 | } 53 | 54 | func main() { 55 | var host string 56 | var wait time.Duration 57 | flag.StringVar(&host, "host", "127.0.0.1:8080", "IP and Port to bind to") 58 | flag.BoolVar(&ignoreCertErrors, "ignore-cert-errors", true, "Ignore Certificate Errors when taking screenshots of fetching ressources") 59 | flag.BoolVar(&debugOutput, "debug", false, "Enable DEBUG mode") 60 | flag.BoolVar(&disableSandbox, "disable-sandbox", false, "Disable chromium sandbox") 61 | flag.StringVar(&proxyServer, "proxy", "", "Proxy Server to use for chromium. Please use format IP:PORT without a protocol.") 62 | flag.DurationVar(&wait, "graceful-timeout", defaultGracefulTimeout, "the duration for which the server gracefully wait for existing connections to finish - e.g. 15s or 1m") 63 | flag.Parse() 64 | 65 | log.SetOutput(os.Stdout) 66 | log.SetLevel(log.InfoLevel) 67 | if debugOutput { 68 | log.SetLevel(log.DebugLevel) 69 | } 70 | 71 | app := &application{} 72 | 73 | srv := &http.Server{ 74 | Addr: host, 75 | Handler: app.routes(), 76 | } 77 | log.Infof("Starting server on %s", host) 78 | if debugOutput { 79 | log.Debug("DEBUG mode enabled") 80 | } 81 | 82 | // continuously print number of goroutines in debug mode 83 | if debugOutput { 84 | go func() { 85 | goRoutineTicker := time.NewTicker(3 * time.Second) 86 | defer goRoutineTicker.Stop() 87 | for range goRoutineTicker.C { 88 | log.Debugf("number of goroutines: %d", runtime.NumGoroutine()) 89 | } 90 | }() 91 | } 92 | 93 | go func() { 94 | if err := srv.ListenAndServe(); err != nil { 95 | log.Error(err) 96 | } 97 | }() 98 | 99 | c := make(chan os.Signal, 1) 100 | signal.Notify(c, syscall.SIGTERM, syscall.SIGINT) 101 | <-c 102 | ctx, cancel := context.WithTimeout(context.Background(), wait) 103 | defer cancel() 104 | if err := srv.Shutdown(ctx); err != nil { 105 | log.Error(err) 106 | } 107 | log.Info("shutting down") 108 | os.Exit(0) 109 | } 110 | 111 | func (app *application) routes() http.Handler { 112 | r := mux.NewRouter() 113 | r.Use(app.loggingMiddleware) 114 | r.Use(app.recoverPanic) 115 | r.HandleFunc("/screenshot", app.errorHandler(app.screenshot)) 116 | r.HandleFunc("/html2pdf", app.errorHandler(app.html2pdf)) 117 | r.HandleFunc("/url2pdf", app.errorHandler(app.url2pdf)) 118 | r.HandleFunc("/html", app.errorHandler(app.html)) 119 | r.PathPrefix("/").HandlerFunc(app.catchAllHandler) 120 | return r 121 | } 122 | 123 | func (app *application) catchAllHandler(w http.ResponseWriter, r *http.Request) { 124 | w.Header().Set("Connection", "close") 125 | w.WriteHeader(http.StatusNotFound) 126 | if _, err := w.Write([]byte("Not found")); err != nil { 127 | log.Error(err) 128 | } 129 | } 130 | 131 | func (app *application) loggingMiddleware(next http.Handler) http.Handler { 132 | return handlers.CombinedLoggingHandler(os.Stdout, next) 133 | } 134 | 135 | func (app *application) toImage(ctx context.Context, url string, w, h *int, userAgent *string) ([]byte, error) { 136 | return app.execChrome(ctx, "screenshot", url, w, h, userAgent) 137 | } 138 | 139 | func (app *application) toPDF(ctx context.Context, url string, w, h *int, userAgent *string) ([]byte, error) { 140 | return app.execChrome(ctx, "pdf", url, w, h, userAgent) 141 | } 142 | 143 | func (app *application) toHTML(ctx context.Context, url string, w, h *int, userAgent *string) ([]byte, error) { 144 | return app.execChrome(ctx, "html", url, w, h, userAgent) 145 | } 146 | 147 | func (app *application) execChrome(ctxMain context.Context, action, url string, w, h *int, userAgent *string) ([]byte, error) { 148 | args := []string{ 149 | "--headless=new", // https://developer.chrome.com/articles/new-headless/ 150 | "--disable-gpu", 151 | "--disable-software-rasterizer", 152 | "--virtual-time-budget=55000", // 55 secs, context timeout is 1 minute 153 | "--disable-dev-shm-usage", 154 | "--hide-scrollbars", 155 | "--disable-crash-reporter", 156 | "--block-new-web-contents", 157 | } 158 | 159 | if w != nil && *w > 0 && h != nil && *h > 0 { 160 | args = append(args, fmt.Sprintf("--window-size=%d,%d", *w, *h)) 161 | } 162 | 163 | if debugOutput { 164 | args = append(args, "--enable-logging") 165 | args = append(args, "--v=1") 166 | } 167 | 168 | if ignoreCertErrors { 169 | args = append(args, "--ignore-certificate-errors") 170 | } 171 | 172 | if disableSandbox { 173 | args = append(args, "--no-sandbox") 174 | } 175 | 176 | if proxyServer != "" { 177 | args = append(args, fmt.Sprintf("--proxy-server=%s", proxyServer)) 178 | } 179 | 180 | if userAgent != nil && len(*userAgent) > 0 { 181 | args = append(args, fmt.Sprintf("--user-agent=%s", *userAgent)) 182 | } 183 | 184 | switch action { 185 | case "screenshot": 186 | args = append(args, "--screenshot") 187 | case "pdf": 188 | args = append(args, "--print-to-pdf", "--no-pdf-header-footer") 189 | case "html": 190 | args = append(args, "--dump-dom") 191 | default: 192 | return nil, fmt.Errorf("unknown action %q", action) 193 | } 194 | 195 | // last parameter is the url 196 | args = append(args, url) 197 | 198 | tmpdir := path.Join(os.TempDir(), fmt.Sprintf("chrome_%s", randStringRunes(10))) // nolint:gomnd 199 | err := os.Mkdir(tmpdir, os.ModePerm) 200 | if err != nil { 201 | return nil, fmt.Errorf("could not create dir %q: %w", tmpdir, err) 202 | } 203 | defer os.RemoveAll(tmpdir) 204 | 205 | ctx, cancel := context.WithTimeout(ctxMain, 1*time.Minute) 206 | defer cancel() 207 | 208 | log.Debugf("going to call chromium with the following args: %v", args) 209 | 210 | var out bytes.Buffer 211 | var stderr bytes.Buffer 212 | cmd := exec.CommandContext(ctx, chromiumPath, args...) 213 | cmd.Dir = tmpdir 214 | cmd.Stdout = &out 215 | cmd.Stderr = &stderr 216 | err = cmd.Run() 217 | if err != nil { 218 | killChromeProcessIfRunning(cmd) 219 | return nil, fmt.Errorf("could not execute command %w: %s", err, stderr.String()) 220 | } 221 | 222 | log.Debugf("STDOUT: %s", out.String()) 223 | log.Debugf("STDERR: %s", stderr.String()) 224 | 225 | var content []byte 226 | 227 | switch action { 228 | case "screenshot": 229 | outfile := path.Join(tmpdir, "screenshot.png") 230 | content, err = os.ReadFile(outfile) 231 | if err != nil { 232 | return nil, fmt.Errorf("could not read temp file: %w", err) 233 | } 234 | case "pdf": 235 | outfile := path.Join(tmpdir, "output.pdf") 236 | content, err = os.ReadFile(outfile) 237 | if err != nil { 238 | return nil, fmt.Errorf("could not read temp file: %w", err) 239 | } 240 | case "html": 241 | content = out.Bytes() 242 | default: 243 | return nil, fmt.Errorf("unknown action %q", action) 244 | } 245 | 246 | killChromeProcessIfRunning(cmd) 247 | 248 | return content, nil 249 | } 250 | 251 | func killChromeProcessIfRunning(cmd *exec.Cmd) { 252 | if cmd.Process == nil { 253 | return 254 | } 255 | if err := cmd.Process.Release(); err != nil { 256 | log.Error(err) 257 | return 258 | } 259 | if err := cmd.Process.Kill(); err != nil { 260 | log.Error(err) 261 | return 262 | } 263 | } 264 | 265 | func (app *application) logError(w http.ResponseWriter, err error, withTrace bool) { 266 | w.Header().Set("Connection", "close") 267 | errorText := fmt.Sprintf("%v", err) 268 | log.Error(errorText) 269 | if withTrace { 270 | log.Errorf("%s", debug.Stack()) 271 | } 272 | http.Error(w, "There was an error processing your request", http.StatusInternalServerError) 273 | } 274 | 275 | func (app *application) recoverPanic(next http.Handler) http.Handler { 276 | return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 277 | defer func() { 278 | if err := recover(); err != nil { 279 | app.logError(w, fmt.Errorf("%s", err), true) 280 | } 281 | }() 282 | next.ServeHTTP(w, r) 283 | }) 284 | } 285 | 286 | func (app *application) errorHandler(h func(*http.Request) (string, []byte, error)) http.HandlerFunc { 287 | return func(w http.ResponseWriter, r *http.Request) { 288 | content, b, err := h(r) 289 | if err != nil { 290 | app.logError(w, err, false) 291 | return 292 | } 293 | w.Header().Set("Content-Type", content) 294 | _, err = w.Write(b) 295 | if err != nil { 296 | app.logError(w, err, false) 297 | return 298 | } 299 | } 300 | } 301 | 302 | func getStringParameter(r *http.Request, paramname string) *string { 303 | p, ok := r.URL.Query()[paramname] 304 | if !ok || len(p[0]) < 1 { 305 | return nil 306 | } 307 | ret := p[0] 308 | return &ret 309 | } 310 | 311 | func getIntParameter(r *http.Request, paramname string) (*int, error) { 312 | p, ok := r.URL.Query()[paramname] 313 | if !ok || len(p[0]) < 1 { 314 | return nil, nil 315 | } 316 | 317 | i, err := strconv.Atoi(p[0]) 318 | if err != nil { 319 | return nil, fmt.Errorf("invalid parameter %s=%q - %w", paramname, p[0], err) 320 | } else if i < 1 { 321 | return nil, fmt.Errorf("invalid parameter %s: %q", paramname, p[0]) 322 | } 323 | 324 | return &i, nil 325 | } 326 | 327 | // http://localhost:8080/screenshot?url=https://firefart.at&w=1024&h=768 328 | func (app *application) screenshot(r *http.Request) (string, []byte, error) { 329 | url := getStringParameter(r, "url") 330 | if url == nil { 331 | return "", nil, fmt.Errorf("missing required parameter url") 332 | } 333 | 334 | // optional parameters start here 335 | w, err := getIntParameter(r, "w") 336 | if err != nil { 337 | return "", nil, err 338 | } 339 | 340 | h, err := getIntParameter(r, "h") 341 | if err != nil { 342 | return "", nil, err 343 | } 344 | 345 | userAgentParam := getStringParameter(r, "useragent") 346 | 347 | content, err := app.toImage(r.Context(), *url, w, h, userAgentParam) 348 | if err != nil { 349 | return "", nil, err 350 | } 351 | 352 | return "image/png", content, nil 353 | } 354 | 355 | // http://localhost:8080/html2pdf?w=1024&h=768 356 | func (app *application) html2pdf(r *http.Request) (string, []byte, error) { 357 | // optional parameters start here 358 | w, err := getIntParameter(r, "w") 359 | if err != nil { 360 | return "", nil, err 361 | } 362 | 363 | h, err := getIntParameter(r, "h") 364 | if err != nil { 365 | return "", nil, err 366 | } 367 | 368 | userAgentParam := getStringParameter(r, "useragent") 369 | 370 | tmpf, err := os.CreateTemp("", "pdf.*.html") 371 | if err != nil { 372 | return "", nil, fmt.Errorf("could not create tmp file: %w", err) 373 | } 374 | defer os.Remove(tmpf.Name()) 375 | 376 | bytes, err := io.Copy(tmpf, r.Body) 377 | if err != nil { 378 | return "", nil, fmt.Errorf("could not copy request: %w", err) 379 | } 380 | if bytes <= 0 { 381 | return "", nil, fmt.Errorf("please provide a valid post body") 382 | } 383 | 384 | err = tmpf.Close() 385 | if err != nil { 386 | return "", nil, fmt.Errorf("could not close tmp file: %w", err) 387 | } 388 | 389 | path, err := filepath.Abs(tmpf.Name()) 390 | if err != nil { 391 | return "", nil, fmt.Errorf("could not get temp file path: %w", err) 392 | } 393 | 394 | content, err := app.toPDF(r.Context(), path, w, h, userAgentParam) 395 | if err != nil { 396 | return "", nil, err 397 | } 398 | 399 | return "application/pdf", content, nil 400 | } 401 | 402 | // http://localhost:8080/url2pdf?w=1024&h=768&url=https://firefart.at 403 | func (app *application) url2pdf(r *http.Request) (string, []byte, error) { 404 | url := getStringParameter(r, "url") 405 | if url == nil { 406 | return "", nil, fmt.Errorf("missing required parameter url") 407 | } 408 | 409 | // optional parameters start here 410 | w, err := getIntParameter(r, "w") 411 | if err != nil { 412 | return "", nil, err 413 | } 414 | 415 | h, err := getIntParameter(r, "h") 416 | if err != nil { 417 | return "", nil, err 418 | } 419 | 420 | userAgentParam := getStringParameter(r, "useragent") 421 | 422 | content, err := app.toPDF(r.Context(), *url, w, h, userAgentParam) 423 | if err != nil { 424 | return "", nil, err 425 | } 426 | 427 | return "application/pdf", content, nil 428 | } 429 | 430 | // http://localhost:8080/html?url=https://firefart.at&w=1024&h=768 431 | func (app *application) html(r *http.Request) (string, []byte, error) { 432 | url := getStringParameter(r, "url") 433 | if url == nil { 434 | return "", nil, fmt.Errorf("missing required parameter url") 435 | } 436 | 437 | // optional parameters start here 438 | w, err := getIntParameter(r, "w") 439 | if err != nil { 440 | return "", nil, err 441 | } 442 | 443 | h, err := getIntParameter(r, "h") 444 | if err != nil { 445 | return "", nil, err 446 | } 447 | 448 | userAgentParam := getStringParameter(r, "useragent") 449 | 450 | content, err := app.toHTML(r.Context(), *url, w, h, userAgentParam) 451 | if err != nil { 452 | return "", nil, err 453 | } 454 | 455 | return "text/plain", content, nil 456 | } 457 | --------------------------------------------------------------------------------