├── .dockerignore
├── .gitignore
├── Dockerfile
├── Dockerfile-arm64
├── Makefile
├── README.md
├── docker-compose.yml.sample
├── docs
├── README.md
└── assets
│ ├── architecture.jpg
│ └── system-architecture.drawio
├── entrypoint.sh
├── env.build
├── env.sample
├── gunicorn_settings.py
├── requirements.txt
└── src
├── config
├── __init__.py
├── asgi.py
├── settings.py
├── urls.py
└── wsgi.py
├── firefox-extensions
└── i_dont_care_about_cookies-3.1.3-an+fx.xpi
├── manage.py
├── media
└── cache
│ └── .gitignore
├── shots
├── __init__.py
├── admin.py
├── apps.py
├── management
│ ├── __init__.py
│ └── commands
│ │ ├── __init__.py
│ │ ├── cleanup_7day_old.py
│ │ ├── screenshot_worker.py
│ │ └── screenshot_worker_ff.py
├── migrations
│ ├── 0001_initial.py
│ ├── 0002_screenshot_is_fullpage.py
│ ├── 0003_auto_20200127_2029.py
│ ├── 0004_screenshot_keywords.py
│ ├── 0005_screenshot_raw_html.py
│ ├── 0006_auto_20200204_1043.py
│ ├── 0007_screenshot_duration.py
│ ├── 0008_auto_20200207_1203.py
│ ├── 0009_auto_20200209_1605.py
│ ├── 0010_auto_20200209_2152.py
│ ├── 0011_remove_screenshot_raw_html.py
│ ├── 0012_remove_screenshot_image.py
│ ├── 0013_screenshot_image_binary.py
│ ├── 0014_auto_20200211_1242.py
│ ├── 0015_auto_20200213_1418.py
│ ├── 0016_auto_20200228_1053.py
│ ├── 0017_screenshot_sleep_seconds.py
│ ├── 0018_screenshot_dpi.py
│ ├── 0019_auto_20200315_1818.py
│ ├── 0020_auto_20200315_2020.py
│ ├── 0021_screenshot_file.py
│ ├── 0022_remove_screenshot_image_binary.py
│ └── __init__.py
├── models.py
├── templates
│ ├── about.html
│ ├── base.html
│ ├── index.html
│ ├── screenshot_get.html
│ └── static
│ │ ├── normalize.css
│ │ └── picnic.css
├── templatetags
│ ├── __init__.py
│ └── thumbnail_url.py
├── tests.py
├── validators.py
└── views.py
└── static
├── css
├── home.css
├── site.css
└── tacit.css
└── js
├── intercooler-1.2.3.min.js
├── jquery-3.4.1.min.js
└── site.js
/.dockerignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | src/staticfiles/
3 | src/api/staticfiles/
4 | data/
5 | __pycache__/
6 | venv/
7 | *.7z
8 | *.zip
9 | playground/*
10 | *.log
11 | *.sqlite3
12 | src/data/*
13 |
14 | .hypothesis/
15 | .mypy_cache/
16 |
17 | *.sql
18 |
19 | _images/
20 | docs/
21 |
22 | src/media/*/**.jpg
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 |
3 | **/.sass-cache/
4 | src/staticfiles/
5 | src/locks
6 | src/media/*/**.jpg
7 |
8 | *.sqlite3
9 | *.log
10 |
11 | src/api/staticfiles/
12 | data/
13 |
14 | env
15 | env.local
16 | env.bash
17 | env.docker
18 |
19 | __pycache__/
20 | venv/
21 |
22 | .mypy_cache/
23 |
24 | .idea/
25 | .vscode/
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | #FROM python:3.6-slim
2 | FROM ubuntu:18.04
3 |
4 | RUN apt-get update && \
5 | apt-get install -y git python3-pip firefox-geckodriver && \
6 | apt-get autoremove
7 |
8 | COPY requirements.txt /
9 | RUN pip3 install --no-cache-dir --upgrade pip
10 | RUN pip3 install --no-cache-dir -r /requirements.txt
11 | RUN mkdir /app
12 | COPY src/ app/
13 |
14 | RUN ln -s /usr/bin/python3 /usr/bin/python
15 |
16 | ARG RELEASE
17 | ENV RELEASE ${RELEASE}
18 |
19 | ENV PYTHONUNBUFFERED 1
20 |
21 | WORKDIR /app
22 | VOLUME ["/images"]
23 |
24 | COPY env.build /env.build
25 | RUN ( set -a; . /env.build; set +a; python manage.py collectstatic --noinput)
26 | RUN rm /env.build
27 |
28 | COPY gunicorn_settings.py /gunicorn_settings.py
29 |
30 | COPY entrypoint.sh /entrypoint.sh
31 | RUN chmod +x /entrypoint.sh
32 | ENTRYPOINT ["/entrypoint.sh"]
33 |
34 | EXPOSE 8000
35 |
36 | CMD ["gunicorn", "-c", "/gunicorn_settings.py", "wsgi:application"]
37 |
--------------------------------------------------------------------------------
/Dockerfile-arm64:
--------------------------------------------------------------------------------
1 | #FROM python:3.6-slim
2 | FROM arm64v8/ubuntu:18.04
3 |
4 | RUN apt-get update && \
5 | apt-get install -y git python3-pip firefox-geckodriver libpq-dev \
6 | postgresql-common && \
7 | apt-get autoremove
8 |
9 | COPY requirements.txt /
10 | RUN pip3 install --no-cache-dir --upgrade pip
11 | RUN pip3 install --no-cache-dir -r /requirements.txt
12 | RUN mkdir /app
13 | COPY src/ app/
14 |
15 | RUN ln -s /usr/bin/python3 /usr/bin/python
16 |
17 | ARG RELEASE
18 | ENV RELEASE ${RELEASE}
19 |
20 | ENV PYTHONUNBUFFERED 1
21 |
22 | WORKDIR /app
23 | VOLUME ["/images"]
24 |
25 | COPY env.build /env.build
26 | RUN ( set -a; . /env.build; set +a; python manage.py collectstatic --noinput)
27 | RUN rm /env.build
28 |
29 | COPY gunicorn_settings.py /gunicorn_settings.py
30 |
31 | COPY entrypoint.sh /entrypoint.sh
32 | RUN chmod +x /entrypoint.sh
33 | ENTRYPOINT ["/entrypoint.sh"]
34 |
35 | EXPOSE 8000
36 |
37 | CMD ["gunicorn", "-c", "/gunicorn_settings.py", "wsgi:application"]
38 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | TAG:=$(shell date "+%Y%m%d%H%M")
2 |
3 | ###############################################################################
4 | # HELP / DEFAULT COMMAND
5 | ###############################################################################
6 | .PHONY: help
7 | help:
8 | @awk 'BEGIN {FS = ":.*?## "} /^[0-9a-zA-Z_-]+:.*?## / {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST)
9 |
10 |
11 | .PHONY: build
12 | build: ## Build the screenshots services
13 | docker build -t screenshots -t screenshots:$(TAG) .
14 |
15 | .PHONY: build-prod
16 | build-prod: ## Build the screenshots services remotely
17 | docker -H screenshots build -t screenshots -t screenshots:$(TAG) .
18 |
19 | .PHONY: prod-deploy
20 | prod-deploy: ## deploy to production
21 | ssh screenshots "cd deployment/screenshots && docker-compose up -d"
22 |
23 | .PHONY: prod-migrate
24 | prod-migrate: ## run production migrations
25 | ssh root@screenshots "cd /root/deployment/screenshots && docker-compose exec web ./manage.py migrate"
26 |
27 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Purpose
2 | The purpose of this project is to explore and experiment with what it takes to
3 | make a website screen-shotting tool. At first it may seem like an easy task,
4 | but it becomes complex once you try.
5 |
6 | **NOTE**: If you just want a tool that "just works" then I suggest you try any of the
7 | capable services linked below.
8 |
9 | # Common problems
10 |
11 | * Javascript heavy pages (almost all these days); many sites use JavaScript to
12 | load content after the page has downloaded into the browser. Therefore you
13 | need to have a modern javascript engine to parse and execute those extra
14 | instructions to get the content as it was intented to be seen by humans.
15 | * Geography-restricted content; some sites in the US have blocked visitors
16 | from Europe because of GDPR. Do you accept this, or is there a way to work
17 | around it?
18 | * Bot and automation detection schemes; some sites use services to protect against
19 | automated processes from collecting content. This includes taking screenshots
20 | * Improperly configured domain names, SSL/TLS encryption certificates, and other
21 | network-related issues
22 | * Nefarious website owners and hacked sites that attempt to exploit the web browser
23 | to mine crypto-currencies. This puts an added load on your resources and can
24 | significantly slow your render-times.
25 | * Taking too many screenshots at a time may overload the server and cause timeouts or
26 | failure to load pages.
27 | * Temporary network or website failure; If the problem is on the site's end, then how
28 | will we know that and schedule another attempt later?
29 | * People using the service as a defacto proxy (eg- pranksters downloading porn at their
30 | schools or in public places)
31 |
32 | ## Requirements
33 |
34 | My development evironment is on MacOS, so HomeBrew and PyCharm are my friends here.
35 |
36 | * python 3.x stable in Virtual Environment (this is the only version I'm working with)
37 | * Selenium/geckodriver/chrome-driver installed via homebrew `brew install geckodriver`
38 | * Docker
39 | * Postgres installed via Homebrew.
40 |
41 | I don't use Docker on my development machine because I have not figured out how to get PyCharm's awesome debugger
42 | working well inside docker containers. IF you can, ping me.
43 |
44 | ## Getting started
45 |
46 | 1. Check out the repo
47 | 1. Install a local virtual environment `python -m venv venv/`
48 | 1. Jump into venv/ with `source venv/bin/activate`
49 | 1. Install requirements `pip install -r requirements.txt`
50 | 1. Create the postgres database for the project `CREATE DATABASE screenshots`
51 | 1. copy the `env.sample` to `env` in the root source folder
52 | 1. Check / update values in the `env` folder if needed
53 | 1. Install Selenium geckodriver for your platform `brew install geckodriver`
54 | 1. Migrate the database `cd src && ./manage.py migrate`
55 | 1. Create the cache table `cd src && ./manage.py createcachetable`
56 | 1. Create the superuser `cd src && ./manage.py createsuperuser`
57 | 1. Start the worker `cd src && ./manage.py screenshot_worker_ff`
58 | 1. Finally, start the webserver `cd src && ./manage.py runserver 0.0.0.0:8000`
59 |
60 | Open a browser onto http://localhost:8000 and see the screenshot app in all its glory.
61 |
62 | ## System Architecture
63 |
64 | ![system architecture][systemarch]
65 |
66 | ### Web process
67 | Django runs as usual in either development mode or inside gunicorn (for production).
68 |
69 | ### Worker Processes
70 | There is a worker (or a number of workers) that run as parallel, independent processes to the webserver process.
71 | They connect to the database and poll for new work on an interval. This pattern obviates the need for Celery, Redis,
72 | RabbitMQ, or other complicated moving parts in the system.
73 |
74 | The worker processes work like this:
75 |
76 | 1. poll database for new screenshots to make
77 | 1. find a screenshot, mark it as pending
78 | 1. launch slenium and take screenshot of resulting page (up to 60 seconds time limit)
79 | 1. save screenshot to database
80 | 1. shutdown selenium browser
81 | 1. sleep
82 | 1. repeat
83 |
84 | ### But where are images stored?
85 | In the database! Now, before you lose it -- I know what many of you will say about storing images in the database. I
86 | have linked to the StackOverflow here:
87 |
88 | * https://stackoverflow.com/questions/3748/storing-images-in-db-yea-or-nay
89 | * https://stackoverflow.com/questions/54500/storing-images-in-postgresql
90 |
91 | My rationale is this:
92 |
93 | * All content lives in the database, so there is no syncing issues with regards to the data (screenshots) and the
94 | metadata (database).
95 | * Images will be smallish because they are compressed screenshots not more than a 1mb (often far less). But we will
96 | need to run many iterations and save as much metadata about the screens to really know.
97 | * Thumbnails will be stored in cache (also a database table), but get purged after 30 days.
98 | * Todays compute, network, and storage capacities are so big that 1TB is no longer considered unreasonable. This means
99 | that if we build up a screenshot datbase of 1TB, then that is a good problem to have and we can re-architect from there.
100 |
101 | Note: This is a hypothesis, and I am willing to change my mind if this does not work out.
102 |
103 | ## Recommended reading on the subject
104 |
105 | * https://medium.com/@eknkc/how-to-run-headless-chrome-in-scale-7a3c7c83b28f
106 | * https://medium.com/@timotheejeannin/i-built-a-screenshot-api-and-some-guy-was-mining-cryptocurrencies-with-it-cd188dfae773
107 |
108 | ## Alternative Services
109 |
110 | * http://url2png.com
111 | * https://apiflash.com/
112 |
113 | ## Thank-yous
114 |
115 | * Philip Walton - [Simple sticky footers using flexbox](https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/)
116 |
117 | ## Contributing
118 |
119 | Please fork and submit pull requests if you are inspired to do so. Issues are open as well.
120 |
121 |
122 | [systemarch]: https://raw.githubusercontent.com/undernewmanagement/screenshots/master/docs/assets/architecture.jpg "Diagram of system architecture"
123 |
124 |
--------------------------------------------------------------------------------
/docker-compose.yml.sample:
--------------------------------------------------------------------------------
1 | version: '3'
2 |
3 | services:
4 |
5 | web:
6 | image: screenshots
7 | restart: always
8 | env_file: env
9 | networks:
10 | - web
11 | labels:
12 | - traefik.enable=true
13 | - traefik.http.routers.screenshot.rule=Host(`yourdomain.com`)
14 | - traefik.http.routers.screenshot.tls=true
15 | - traefik.http.routers.screenshot.tls.certresolver=le
16 | - traefik.http.services.screenshot.loadbalancer.server.port=8000
17 |
18 | - traefik.http.middlewares.screenshot.compress=true
19 |
20 | - traefik.http.middlewares.
21 | - traefik.http.routers.screenshot.middlewares=screenshot@docker
22 |
23 | worker_ff:
24 | image: screenshots
25 | command: ./manage.py screenshot_worker_ff
26 | restart: always
27 | env_file: env
28 | volumes:
29 | - /dev/shm:/dev/shm
30 | - ./geckodriver.log:/app/geckodriver.log
31 | networks:
32 | - web
33 |
34 | networks:
35 | web:
36 | external: true
37 |
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glovebx/screenshots/6d328fdbbfe9076a70dedd3117005d1a5f1d9438/docs/README.md
--------------------------------------------------------------------------------
/docs/assets/architecture.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glovebx/screenshots/6d328fdbbfe9076a70dedd3117005d1a5f1d9438/docs/assets/architecture.jpg
--------------------------------------------------------------------------------
/docs/assets/system-architecture.drawio:
--------------------------------------------------------------------------------
1 | 1VrbctowEP0aHsv4CuSxgSS0k0zTIdMkfRO2YisRliuLW7++EpZ8k4NpgBheEmm1K0tHe3bXMh17OFvdUBCHd8SHuGMZ/qpjjzqWZdq2wf8JyTqVDEwnFQQU+VIpF0zQXyiF0i6YIx8mJUVGCGYoLgs9EkXQYyUZoJQsy2ovBJefGoMAaoKJB7AufUQ+C+UuXCOXjyEKQvVk05AjM6CUpSAJgU+WBZF91bGHlBCWtmarIcQCPIVLanf9zmi2MAojtovB03c4Hq/vjGHkEPDn98us/zT+Yl2k0ywAnssdy9WytYKAknnkQzGL0bEvlyFicBIDT4wu+aFzWchmmPdM3vRBEm50RUdfolz1AlIGVwWRXPINJDPI6JqryFGrL+GT/qOAX+aH4SpZWDgI90IKgXSAIJs6x4g3JEz/AZnyzJOFzDbLkJm2jplt1WDmuMfCzOppEEGf80x2CWUhCUgE8FUuvSyDmOvcEhJLtF4hY2sZNMCckTKwcIXYkzDv9l3ZfS4MjVZy6k1nrToR33Bq5apuZiU6udmmp+wSBij7KiIOF3gYJAnylPgaYdx0vgmZUw9uw1BGP0ADyLboyfgq8N3qLRRiwNCiHOcOf/I1ZOlhJtweLXgzEM17krCAwsnPWzXIn1UY15yngVEgidNc8IJWwoEOwSqnEogcR2fVoIZUg2NxytGQnTBCRTqrosUzTyya3hojzijaHJKmKfVup5kAeG/BhpA/5ozPAg8XrnqDZmB7nwnsoDm+Cyji3feelSdgqmYwtmJSdbYspBcwySqNIiiZ8OCouM2o7M/KrefR7FFtUbF3Btio0Upt0HIQ62vIPRL6BmlysgBaZQCtlgE0TQ1BlUOnKn2OiMchLaTW6buJtd3C1TK6hmG5ZQ+tQfjC6ro6xtzaORLKNYT+xPpVtZ/zqrS5fM3q1YJVm+WrenlvKl8l0ocrXzemfGdgXVCICYpYUpj5XggKRUklTqr3zusd9Z2BUXG6dAW5C2Zb2YP7GvVHryAKyBFi50fKZZ3LPN1woWMN3L65+eu0Gzwz523ptbT4VmrsRmvjxGitonMjr81D83q/k9cvcR4ogC/o7RTIYzs15Emz4wmRxz73nNhvnT39tpLiXidv6+TRas5fd2dRb5aT9u6V5rGQdc6OU6eWkOydC03jtEj1/otc4SZ0/PBw/94daa3yJGMmrWo23LG2Ss3q/VfNJwyz7hNG72jMbDXbcV6VasXuRQM7eeceUsT3Lq57t37YMFpnrLUjY+3TqiHVuguM/RZxvCO+Cc1XMEZxInwiu4zHZO63kPOq9387EusDZSTv5l+303fe/DcC9tU/
--------------------------------------------------------------------------------
/entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | set -e
4 |
5 | cmd="$@"
6 |
7 | DO_MIGRATION=${DO_MIGRATION:-n}
8 | DO_STATIC=${DO_STATIC:-n}
9 |
10 | if [[ $DO_MIGRATION = y ]]; then
11 | python /app/manage.py migrate
12 | fi
13 |
14 | if [[ $DO_STATIC = y ]]; then
15 | python /app/manage.py collectstatic
16 | fi
17 |
18 | echo "[ * ] Starting app"
19 | exec $cmd
20 |
--------------------------------------------------------------------------------
/env.build:
--------------------------------------------------------------------------------
1 | SECRET_KEY=lllllllllllllllllllllllllllllllllllllllllllllllllll
2 | DATABASE_URL=postgres://postgres@localhost/screenshot
3 | EMAIL_URL=smtp://localhost:1025/
4 | SENTRY_DSN=
5 | DEV_ENV=build
6 |
--------------------------------------------------------------------------------
/env.sample:
--------------------------------------------------------------------------------
1 | SECRET_KEY=your-crazy-long-secret-key-should-go-here-no-questions
2 | SENTRY_DSN=
3 | DATABASE_URL=postgres://postgres@127.0.0.1/screenshots
4 | SMTP_URL=smtp://mailhog:1025
5 | DEV_ENV=dev
6 | DEBUG=y
7 |
8 | SOCKS5_PROXY_ENABLED=n
9 | SOCKS5_PROXY_HOSTNAME=
10 | SOCKS5_PROXY_PORT=
11 |
12 | S3_BUCKET_PREFIX=
13 | S3_REGION_NAME=
14 | S3_ENDPOINT_URL=
15 | AWS_ACCESS_KEY_ID=
16 | AWS_SECRET_ACCESS_KEY=
--------------------------------------------------------------------------------
/gunicorn_settings.py:
--------------------------------------------------------------------------------
1 | bind = "0.0.0.0:8000"
2 | workers = 2
3 | pythonpath = '/app/config'
4 | forwarded_allow_ips = '*'
5 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | asgiref==3.2.3
2 | boto3==1.12.25
3 | botocore==1.15.25
4 | certifi==2019.11.28
5 | chardet==3.0.4
6 | dj-database-url==0.5.0
7 | dj-email-url==0.2.0
8 | Django==3.0.3
9 | django-csp==3.6
10 | django-storages==1.9.1
11 | docutils==0.15.2
12 | gunicorn==20.0.4
13 | idna==2.9
14 | jmespath==0.9.5
15 | Pillow==7.0.0
16 | psycopg2-binary==2.8.4
17 | python-dateutil==2.8.1
18 | python-dotenv==0.10.5
19 | python-magic==0.4.15
20 | pytz==2019.3
21 | requests==2.23.0
22 | s3transfer==0.3.3
23 | selenium==3.141.0
24 | sentry-sdk==0.14.2
25 | six==1.14.0
26 | sorl-thumbnail==12.6.3
27 | sqlparse==0.3.0
28 | urllib3==1.25.8
29 | whitenoise==5.0.1
30 |
--------------------------------------------------------------------------------
/src/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glovebx/screenshots/6d328fdbbfe9076a70dedd3117005d1a5f1d9438/src/config/__init__.py
--------------------------------------------------------------------------------
/src/config/asgi.py:
--------------------------------------------------------------------------------
1 | """
2 | ASGI config for config project.
3 |
4 | It exposes the ASGI callable as a module-level variable named ``application``.
5 |
6 | For more information on this file, see
7 | https://docs.djangoproject.com/en/3.0/howto/deployment/asgi/
8 | """
9 |
10 | import os
11 |
12 | from django.core.asgi import get_asgi_application
13 |
14 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
15 |
16 | application = get_asgi_application()
17 |
--------------------------------------------------------------------------------
/src/config/settings.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sentry_sdk
3 | from sentry_sdk.integrations.django import DjangoIntegration
4 | import dj_database_url
5 | import dj_email_url
6 | from dotenv import load_dotenv, find_dotenv
7 |
8 |
9 | load_dotenv(find_dotenv(os.getenv('ENV_FILE', 'env')))
10 |
11 | SECRET_KEY = os.environ['SECRET_KEY']
12 |
13 | DEBUG = os.getenv('DEBUG') in ['true', 'y', 't']
14 |
15 | SECURE_PROXY_SSL_HEADER = ('HTTP_X_FORWARDED_PROTO', 'https')
16 |
17 | """
18 | SOCKS5 Configuration
19 | - used by the screenshot worker, generally to get around the error code 451
20 | GDPR bullshit
21 | """
22 | SOCKS5_PROXY_ENABLED = os.getenv('SOCKS5_PROXY_ENABLED') in ['true', 'y', 't']
23 |
24 | if SOCKS5_PROXY_ENABLED:
25 | SOCKS5_PROXY_HOSTNAME = os.getenv('SOCKS5_PROXY_HOSTNAME')
26 | SOCKS5_PROXY_PORT = int(os.getenv('SOCKS5_PROXY_PORT'))
27 |
28 | if os.environ['DEV_ENV'] in ['production', 'staging']:
29 | sentry_sdk.init(
30 | dsn=os.getenv('SENTRY_DSN'),
31 | integrations=[DjangoIntegration()],
32 |
33 | # If you wish to associate users to errors (assuming you are using
34 | # django.contrib.auth) you may enable sending PII data.
35 | send_default_pii=True
36 | )
37 |
38 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
39 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
40 |
41 | ALLOWED_HOSTS = ['*']
42 |
43 |
44 | # Application definition
45 |
46 | INSTALLED_APPS = [
47 | 'django.contrib.admin',
48 | 'django.contrib.auth',
49 | 'django.contrib.contenttypes',
50 | 'django.contrib.sessions',
51 | 'django.contrib.messages',
52 | 'django.contrib.staticfiles',
53 | 'sorl.thumbnail',
54 | 'shots',
55 | ]
56 |
57 | MIDDLEWARE = [
58 | 'django.middleware.security.SecurityMiddleware',
59 | 'whitenoise.middleware.WhiteNoiseMiddleware',
60 | 'django.contrib.sessions.middleware.SessionMiddleware',
61 | 'django.middleware.common.CommonMiddleware',
62 | 'django.middleware.csrf.CsrfViewMiddleware',
63 | 'csp.middleware.CSPMiddleware',
64 | 'django.contrib.auth.middleware.AuthenticationMiddleware',
65 | 'django.contrib.messages.middleware.MessageMiddleware',
66 | 'django.middleware.clickjacking.XFrameOptionsMiddleware',
67 | ]
68 |
69 | ROOT_URLCONF = 'config.urls'
70 |
71 | TEMPLATES = [
72 | {
73 | 'BACKEND': 'django.template.backends.django.DjangoTemplates',
74 | 'DIRS': [],
75 | 'APP_DIRS': True,
76 | 'OPTIONS': {
77 | 'context_processors': [
78 | 'django.template.context_processors.debug',
79 | 'django.template.context_processors.request',
80 | 'django.contrib.auth.context_processors.auth',
81 | 'django.contrib.messages.context_processors.messages',
82 | ],
83 | },
84 | },
85 | ]
86 |
87 | WSGI_APPLICATION = 'config.wsgi.application'
88 |
89 |
90 | """
91 | DATABASES = {
92 | 'default': {
93 | 'ENGINE': 'django.db.backends.sqlite3',
94 | 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
95 | }
96 | }
97 | """
98 |
99 | if os.getenv('DEV_ENV') not in 'build':
100 | DATABASES = {
101 | 'default': dj_database_url.parse(os.environ['DATABASE_URL'], conn_max_age=600)
102 | }
103 |
104 | CACHES = {
105 | 'default': {
106 | 'BACKEND': 'django.core.cache.backends.db.DatabaseCache',
107 | 'LOCATION': 'cache',
108 | },
109 | 'page': {
110 | 'BACKEND': 'django.core.cache.backends.locmem.LocMemCache',
111 | 'LOCATION': 'unique-snowflake',
112 | }
113 | }
114 |
115 |
116 |
117 | # Password validation
118 | # https://docs.djangoproject.com/en/3.0/ref/settings/#auth-password-validators
119 |
120 | AUTH_PASSWORD_VALIDATORS = [
121 | {
122 | 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
123 | },
124 | {
125 | 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
126 | },
127 | {
128 | 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
129 | },
130 | {
131 | 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
132 | },
133 | ]
134 |
135 |
136 | # Internationalization
137 | # https://docs.djangoproject.com/en/3.0/topics/i18n/
138 |
139 | LANGUAGE_CODE = 'en-us'
140 |
141 | TIME_ZONE = 'UTC'
142 |
143 | USE_I18N = True
144 |
145 | USE_L10N = True
146 |
147 | USE_TZ = True
148 |
149 | """
150 | EMAIL SETTINGS
151 | """
152 | DEFAULT_FROM_EMAIL = "no-reply@screenshot.m3b.net"
153 |
154 | if os.environ['DEV_ENV'] in ['test', 'build']:
155 | EMAIL_BACKEND = 'django.core.mail.backends.locmem.EmailBackend'
156 | else:
157 | email_config = dj_email_url.parse(os.environ["SMTP_URL"])
158 |
159 | EMAIL_HOST = email_config['EMAIL_HOST']
160 | EMAIL_HOST_USER = email_config['EMAIL_HOST_USER']
161 | EMAIL_HOST_PASSWORD = email_config['EMAIL_HOST_PASSWORD']
162 | EMAIL_PORT = email_config['EMAIL_PORT']
163 | EMAIL_USE_TLS = email_config['EMAIL_USE_TLS']
164 |
165 | """
166 | STATIC ASSET HANDLING
167 | - WhiteNoise configuration for forever-cacheable files and compression support
168 | """
169 | STATIC_ROOT = os.path.join(BASE_DIR, 'staticfiles')
170 |
171 | STATIC_URL = '/static/'
172 | STATICFILES_DIRS = [
173 | os.path.join(BASE_DIR, "static"),
174 | ]
175 |
176 | MEDIA_ROOT = os.path.join(BASE_DIR, "media")
177 | MEDIA_URL = '/media/'
178 |
179 | """
180 | COOKIES & CSRF COOKIE POLICIES
181 |
182 | TODO: These are only enabled in production because people the admin/ won't
183 | work without HTTPS enabled. And I'm too lazy to futz with HTTPS on localhost
184 | right now.
185 | """
186 | if not DEBUG:
187 | CSRF_COOKIE_HTTPONLY = True
188 | CSRF_COOKIE_SAMESITE = 'Strict'
189 | CSRF_COOKIE_SECURE = True
190 | CSRF_COOKIE_NAME = '__Host-csrftoken'
191 | SESSION_COOKIE_SAMESITE = 'Strict'
192 | SESSION_COOKIE_SECURE = True
193 |
194 | """
195 | S3 Settings
196 | """
197 | S3_BUCKET_PREFIX = os.getenv('S3_BUCKET_PREFIX')
198 | S3_REGION_NAME = os.getenv('S3_REGION_NAME')
199 | S3_ENDPOINT_URL = os.getenv('S3_ENDPOINT_URL')
200 | AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID')
201 | AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY')
202 |
203 | """
204 | sorl thumbnails
205 | """
206 | THUMBNAIL_DUMMY = True
207 |
208 | """
209 | CONTENT-SECURITY-POLICY
210 | - Refer to Mozilla Observatory when crafting your CSP: https://observatory.mozilla.org
211 | """
212 | CSP_DEFAULT_SRC = ("'none'",)
213 | CSP_SCRIPT_SRC = ("'self'",'https://www.googletagmanager.com','https://www.google-analytics.com',)
214 | CSP_STYLE_SRC = ("'self'",)
215 | CSP_INCLUDE_NONCE_IN = ['script-src', 'style-src']
216 | CSP_IMG_SRC = ("'self'","data:",'https://www.google-analytics.com', S3_ENDPOINT_URL, "http://dummyimage.com")
217 | CSP_FRAME_ANCESTORS = ("'none'",)
218 | CSP_BASE_URI = ("'none'",)
219 | CSP_FORM_ACTION = ("'self'",)
220 |
221 | DEFAULT_FILE_STORAGE = 'storages.backends.s3boto3.S3Boto3Storage'
222 |
223 | AWS_STORAGE_BUCKET_NAME = S3_BUCKET_PREFIX
224 | AWS_DEFAULT_ACL = 'public-read'
225 | # AWS_S3_OBJECT_PARAMETERS = {}
226 | # AWS_LOCATION = ''
227 | AWS_S3_REGION_NAME = S3_REGION_NAME
228 | AWS_S3_ENDPOINT_URL = S3_ENDPOINT_URL
--------------------------------------------------------------------------------
/src/config/urls.py:
--------------------------------------------------------------------------------
1 | from django.contrib import admin
2 | from django.urls import path
3 | from shots import views
4 |
5 |
6 | urlpatterns = [
7 | path('admin/', admin.site.urls),
8 | path('', views.index, name='home'),
9 | path('about', views.about, name='about'),
10 | path('screenshot/create', views.screenshot_create, name='screenshot_create'),
11 | path('screenshot/', views.screenshot_get, name='screenshot_get'),
12 |
13 | path('api/screenshot', views.api_screenshot, name='api-screenshot'),
14 | path('health-check', views.health_check, name='health-check'),
15 | ]
16 |
--------------------------------------------------------------------------------
/src/config/wsgi.py:
--------------------------------------------------------------------------------
1 | """
2 | WSGI config for config project.
3 |
4 | It exposes the WSGI callable as a module-level variable named ``application``.
5 |
6 | For more information on this file, see
7 | https://docs.djangoproject.com/en/3.0/howto/deployment/wsgi/
8 | """
9 |
10 | import os
11 |
12 | from django.core.wsgi import get_wsgi_application
13 |
14 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
15 |
16 | application = get_wsgi_application()
17 |
--------------------------------------------------------------------------------
/src/firefox-extensions/i_dont_care_about_cookies-3.1.3-an+fx.xpi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glovebx/screenshots/6d328fdbbfe9076a70dedd3117005d1a5f1d9438/src/firefox-extensions/i_dont_care_about_cookies-3.1.3-an+fx.xpi
--------------------------------------------------------------------------------
/src/manage.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """Django's command-line utility for administrative tasks."""
3 | import os
4 | import sys
5 |
6 |
7 | def main():
8 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
9 | try:
10 | from django.core.management import execute_from_command_line
11 | except ImportError as exc:
12 | raise ImportError(
13 | "Couldn't import Django. Are you sure it's installed and "
14 | "available on your PYTHONPATH environment variable? Did you "
15 | "forget to activate a virtual environment?"
16 | ) from exc
17 | execute_from_command_line(sys.argv)
18 |
19 |
20 | if __name__ == '__main__':
21 | main()
22 |
--------------------------------------------------------------------------------
/src/media/cache/.gitignore:
--------------------------------------------------------------------------------
1 | *.jpg
--------------------------------------------------------------------------------
/src/shots/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glovebx/screenshots/6d328fdbbfe9076a70dedd3117005d1a5f1d9438/src/shots/__init__.py
--------------------------------------------------------------------------------
/src/shots/admin.py:
--------------------------------------------------------------------------------
1 | from django.contrib import admin
2 | from .models import ScreenShot
3 |
4 |
5 | def reset_status(modeladmin, request, queryset):
6 | queryset.update(status=ScreenShot.NEW)
7 |
8 | def reset_status_to_failed(modeladmin, request, queryset):
9 | queryset.update(status=ScreenShot.FAILURE)
10 |
11 | reset_status.short_description = "Reset status to NEW (Refresh Images)"
12 | reset_status_to_failed.short_description = "Mark all as failed"
13 |
14 | class ScreenShotAdmin(admin.ModelAdmin):
15 | list_display = ('url', 'status', 'format', 'keywords', 'created_at')
16 | list_filter = ('status', 'format', )
17 | search_fields = ('url', 'keywords',)
18 | readonly_fields = ('width', 'height', 'duration', 'format', )
19 |
20 | actions = [reset_status, reset_status_to_failed]
21 |
22 |
23 | admin.site.register(ScreenShot, ScreenShotAdmin)
24 |
--------------------------------------------------------------------------------
/src/shots/apps.py:
--------------------------------------------------------------------------------
1 | from django.apps import AppConfig
2 |
3 |
4 | class ShotsConfig(AppConfig):
5 | name = 'shots'
6 |
--------------------------------------------------------------------------------
/src/shots/management/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glovebx/screenshots/6d328fdbbfe9076a70dedd3117005d1a5f1d9438/src/shots/management/__init__.py
--------------------------------------------------------------------------------
/src/shots/management/commands/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glovebx/screenshots/6d328fdbbfe9076a70dedd3117005d1a5f1d9438/src/shots/management/commands/__init__.py
--------------------------------------------------------------------------------
/src/shots/management/commands/cleanup_7day_old.py:
--------------------------------------------------------------------------------
1 | from datetime import timedelta
2 | from time import sleep
3 | from django.core.management.base import BaseCommand, CommandError
4 | from django.utils import timezone
5 | from shots.models import ScreenShot
6 |
7 |
8 | class Command(BaseCommand):
9 | help = 'Remove screenshots mor than 7 days old'
10 |
11 | def handle(self, *args, **options):
12 |
13 | while True:
14 | target_day = timezone.now() - timedelta(days=7)
15 | count, result = ScreenShot.objects.filter(created_at__lt=target_day).delete()
16 | self.stdout.write(self.style.SUCCESS(f'Deleted {count} screenshots'))
17 |
18 | sleep(86400)
--------------------------------------------------------------------------------
/src/shots/management/commands/screenshot_worker.py:
--------------------------------------------------------------------------------
1 | from django.core.management.base import BaseCommand, CommandError
2 | from selenium import webdriver
3 | from time import sleep
4 | from shots.models import ScreenShot
5 | from django.conf import settings
6 |
7 | class Command(BaseCommand):
8 | help = 'Run the screenshot worker'
9 |
10 | def handle(self, *args, **options):
11 |
12 | while True:
13 | sleep(3)
14 | shots = ScreenShot.objects.filter(status=ScreenShot.NEW)
15 |
16 | if shots.count() > 0:
17 | shot = shots.all()[0]
18 | self.stdout.write(self.style.SUCCESS(f'Screenshot started: {shot.url}'))
19 |
20 | shot.status = ScreenShot.PENDING
21 | shot.save()
22 | self.get_screenshot(shot)
23 | shot.status = ScreenShot.SUCCESS
24 | shot.save()
25 |
26 | self.stdout.write(self.style.SUCCESS(f'Screenshot saved: {shot.url}'))
27 |
28 | def get_screenshot(self, shot):
29 | options = webdriver.ChromeOptions()
30 | options.add_argument('--no-sandbox')
31 | options.add_argument('--headless')
32 | options.add_argument('--disable-gpu')
33 |
34 | driver = webdriver.Chrome(options=options)
35 | driver.set_window_size(1280,960)
36 | driver.get(shot.url)
37 |
38 | height = driver.execute_script("return document.body.scrollHeight")
39 | driver.set_window_size(1280,height+100)
40 | sleep(10)
41 | driver.save_screenshot(f"{settings.IMAGE_DIR}/{shot.id}.png")
42 |
43 | driver.quit()
44 |
--------------------------------------------------------------------------------
/src/shots/management/commands/screenshot_worker_ff.py:
--------------------------------------------------------------------------------
1 | import tempfile
2 |
3 | import requests
4 | from django.core.files import File
5 | from django.core.management.base import BaseCommand, CommandError
6 | from django.utils import timezone
7 | from selenium import webdriver
8 | from selenium.webdriver.firefox.options import Options
9 | from selenium.common.exceptions import NoSuchElementException, WebDriverException, TimeoutException
10 | from django.conf import settings
11 | from time import sleep
12 | from shots.models import ScreenShot
13 | from datetime import datetime
14 | import random
15 | from PIL import Image
16 | import io
17 | from django.core.cache import cache
18 | from django.core.cache import caches
19 | from sentry_sdk import capture_exception
20 | import boto3
21 |
22 |
23 | class ScreenShotException(Exception):
24 | pass
25 |
26 |
27 | class SeleniumScreenShot(object):
28 | def __init__(self, height, title, description, file):
29 | self.height = height
30 | self.title = title
31 | self.description = description
32 | self.file = file
33 |
34 | """
35 | def upload_to_s3(file_bytes, s3_object_name):
36 |
37 | # required because we are on scaleway at the moment
38 | s3 = boto3.client('s3',
39 | region_name=settings.S3_REGION_NAME,
40 | endpoint_url=settings.S3_ENDPOINT_URL,
41 | aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
42 | aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY
43 | )
44 |
45 | s3.put_object(
46 | Body=file_bytes,
47 | Bucket=f"{settings.S3_BUCKET_PREFIX}",
48 | Key=s3_object_name,
49 | ACL='public-read',
50 | CacheControl='max-age=31556926' # 1 year
51 | )
52 | """
53 |
54 | def convert_png_to_jpg(binary_data) -> bytes:
55 |
56 | with io.BytesIO(binary_data) as png_file:
57 | with Image.open(png_file).convert('RGB') as i:
58 | with io.BytesIO() as output:
59 | i.save(output, format="JPEG")
60 | image_binary = output.getvalue()
61 |
62 | return image_binary
63 |
64 | class Command(BaseCommand):
65 | help = 'Run the screenshot worker'
66 |
67 | def handle(self, *args, **options):
68 |
69 | self.stdout.write(self.style.SUCCESS(f'Starting Screenshot Firefox Worker.'))
70 | while True:
71 | # do this to try to prevent race conditions when multiple workers
72 | # are present.
73 | sleep(random.randrange(1, 10))
74 |
75 | start = timezone.now().strftime('%s')
76 | shots = ScreenShot.objects.filter(status=ScreenShot.NEW)
77 |
78 | if shots.count() > 0:
79 | shot = shots.all()[0]
80 | self.stdout.write(self.style.SUCCESS(f'Screenshot started: {shot.url}'))
81 |
82 | cache.delete(shot.id.hex)
83 | caches['page'].clear()
84 |
85 | shot.status = ScreenShot.PENDING
86 | shot.save()
87 |
88 | try:
89 | results = self.get_screenshot(shot)
90 | shot.status = ScreenShot.SUCCESS
91 | shot.height = results.height
92 | shot.duration = int(timezone.now().strftime('%s')) - int(start)
93 |
94 | with tempfile.TemporaryFile(mode="w+b") as f:
95 | f.write(results.file)
96 | shot.file.save(f"{shot.id.hex}.jpg", File(f))
97 |
98 | # JSON Fields
99 | shot.meta = {
100 | 'title': results.title,
101 | 'description': results.description
102 | }
103 |
104 | shot.save()
105 |
106 | self.stdout.write(self.style.SUCCESS(f'Screenshot saved: {shot.url} {shot.duration} seconds'))
107 | self.do_webhook(shot)
108 |
109 | except ScreenShotException as e:
110 | shot.status = ScreenShot.FAILURE
111 | shot.save()
112 | self.stdout.write(self.style.ERROR(f'Error: {e}'))
113 |
114 | def do_webhook(self, shot):
115 |
116 | if not shot.callback_url:
117 | return
118 |
119 | payload = {
120 | 'id': shot.id.hex,
121 | 'url': shot.url,
122 | 'callback_url': shot.callback_url,
123 | 'created_at': shot.created_at.strftime("%Y-%m-%dT%H:%M:%S%z"),
124 | 'image_url': shot.s3_url,
125 | 'title': shot.meta['title'],
126 | 'description': shot.meta['description']
127 | }
128 |
129 | headers = {
130 | 'content-type': 'application/json'
131 | }
132 |
133 | requests.post(shot.callback_url, json=payload, headers=headers)
134 |
135 | self.stdout.write(self.style.SUCCESS(f'Fired Webhook: {shot.url} to {shot.callback_url}'))
136 |
137 | def get_screenshot(self, shot) -> SeleniumScreenShot:
138 |
139 | profile = webdriver.FirefoxProfile()
140 | profile.set_preference("layout.css.devPixelsPerPx", str(shot.dpi))
141 |
142 | if settings.SOCKS5_PROXY_ENABLED:
143 | self.stdout.write(self.style.SUCCESS(f'Proxy enabled: {settings.SOCKS5_PROXY_HOSTNAME}:{settings.SOCKS5_PROXY_PORT}'))
144 | profile.set_preference('network.proxy.type', 1)
145 | profile.set_preference("network.proxy.socks_version", 5)
146 | profile.set_preference('network.proxy.socks', settings.SOCKS5_PROXY_HOSTNAME)
147 |
148 | # explicit casting to int because otherwise it is ignored and fails silently.
149 | profile.set_preference('network.proxy.socks_port', int(settings.SOCKS5_PROXY_PORT))
150 | profile.set_preference("network.proxy.socks_remote_dns", True)
151 |
152 | profile.set_preference("dom.webnotifications.enabled", False)
153 | profile.set_preference("dom.push.enabled", False)
154 |
155 | options = Options()
156 | options.headless = True
157 |
158 | driver = webdriver.Firefox(options=options, firefox_profile=profile)
159 | driver.install_addon(f'{settings.BASE_DIR}/firefox-extensions/i_dont_care_about_cookies-3.1.3-an+fx.xpi')
160 | driver.set_page_load_timeout(60)
161 | driver.set_window_size(shot.width, shot.height)
162 |
163 | try:
164 | driver.get(shot.url)
165 | except WebDriverException as e:
166 | driver.quit()
167 | capture_exception(e)
168 | raise ScreenShotException
169 |
170 | for i in range(10):
171 | doc_element_height = driver.execute_script("return document.documentElement.scrollHeight")
172 | doc_body_height = driver.execute_script("return document.body.scrollHeight")
173 | height = doc_element_height if doc_element_height > doc_body_height else doc_body_height
174 | driver.execute_script("window.scrollTo(0,document.body.scrollHeight)")
175 | sleep(1)
176 |
177 | # some sites like pandora and statesman.com have error/GDPR pages that are shorter than
178 | # a normal screen.
179 | if height > shot.height:
180 | driver.set_window_size(shot.width, height+100)
181 |
182 | sleep(shot.sleep_seconds) # this might not be necessary, but needs testing
183 |
184 | image_binary = convert_png_to_jpg(driver.get_screenshot_as_png())
185 |
186 | # with io.BytesIO(driver.get_screenshot_as_png()) as png_file:
187 | #
188 | # with Image.open(png_file).convert('RGB') as i:
189 | #
190 | # with io.BytesIO() as output:
191 | # i.save(output, format="JPEG")
192 | # image_binary = output.getvalue()
193 |
194 | title = driver.title
195 | try:
196 | description = driver.find_element_by_xpath("//meta[@name='description']").get_attribute("content")
197 | except NoSuchElementException:
198 | description = title
199 |
200 | driver.quit()
201 | return SeleniumScreenShot(height=height, title=title, description=description, file=image_binary)
--------------------------------------------------------------------------------
/src/shots/migrations/0001_initial.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 3.0.2 on 2020-01-27 14:07
2 |
3 | from django.db import migrations, models
4 | import uuid
5 |
6 |
7 | class Migration(migrations.Migration):
8 |
9 | initial = True
10 |
11 | dependencies = [
12 | ]
13 |
14 | operations = [
15 | migrations.CreateModel(
16 | name='ScreenShot',
17 | fields=[
18 | ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
19 | ('url', models.TextField()),
20 | ('status', models.CharField(choices=[('N', 'New'), ('P', 'Pending'), ('S', 'Success'), ('F', 'Failed'), ('R1', 'Retry #1'), ('R2', 'Retry #2'), ('R3', 'Retry #3')], default='N', max_length=2)),
21 | ],
22 | ),
23 | ]
24 |
--------------------------------------------------------------------------------
/src/shots/migrations/0002_screenshot_is_fullpage.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 3.0.2 on 2020-01-27 15:03
2 |
3 | from django.db import migrations, models
4 |
5 |
6 | class Migration(migrations.Migration):
7 |
8 | dependencies = [
9 | ('shots', '0001_initial'),
10 | ]
11 |
12 | operations = [
13 | migrations.AddField(
14 | model_name='screenshot',
15 | name='is_fullpage',
16 | field=models.BooleanField(default=False),
17 | ),
18 | ]
19 |
--------------------------------------------------------------------------------
/src/shots/migrations/0003_auto_20200127_2029.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 3.0.2 on 2020-01-27 20:29
2 |
3 | from django.db import migrations, models
4 |
5 |
6 | class Migration(migrations.Migration):
7 |
8 | dependencies = [
9 | ('shots', '0002_screenshot_is_fullpage'),
10 | ]
11 |
12 | operations = [
13 | migrations.AddField(
14 | model_name='screenshot',
15 | name='height',
16 | field=models.IntegerField(default=768),
17 | ),
18 | migrations.AddField(
19 | model_name='screenshot',
20 | name='width',
21 | field=models.IntegerField(default=1366),
22 | ),
23 | ]
24 |
--------------------------------------------------------------------------------
/src/shots/migrations/0004_screenshot_keywords.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 3.0.2 on 2020-01-27 21:29
2 |
3 | from django.db import migrations, models
4 |
5 |
6 | class Migration(migrations.Migration):
7 |
8 | dependencies = [
9 | ('shots', '0003_auto_20200127_2029'),
10 | ]
11 |
12 | operations = [
13 | migrations.AddField(
14 | model_name='screenshot',
15 | name='keywords',
16 | field=models.CharField(blank=True, max_length=250, null=True),
17 | ),
18 | ]
19 |
--------------------------------------------------------------------------------
/src/shots/migrations/0005_screenshot_raw_html.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 3.0.2 on 2020-01-31 09:40
2 |
3 | from django.db import migrations, models
4 |
5 |
6 | class Migration(migrations.Migration):
7 |
8 | dependencies = [
9 | ('shots', '0004_screenshot_keywords'),
10 | ]
11 |
12 | operations = [
13 | migrations.AddField(
14 | model_name='screenshot',
15 | name='raw_html',
16 | field=models.TextField(blank=True, null=True),
17 | ),
18 | ]
19 |
--------------------------------------------------------------------------------
/src/shots/migrations/0006_auto_20200204_1043.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 3.0.2 on 2020-02-04 10:43
2 |
3 | from django.db import migrations, models
4 |
5 |
6 | class Migration(migrations.Migration):
7 |
8 | dependencies = [
9 | ('shots', '0005_screenshot_raw_html'),
10 | ]
11 |
12 | operations = [
13 | migrations.RemoveField(
14 | model_name='screenshot',
15 | name='is_fullpage',
16 | ),
17 | migrations.AlterField(
18 | model_name='screenshot',
19 | name='url',
20 | field=models.CharField(max_length=500),
21 | ),
22 | ]
23 |
--------------------------------------------------------------------------------
/src/shots/migrations/0007_screenshot_duration.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 3.0.2 on 2020-02-07 00:15
2 |
3 | from django.db import migrations, models
4 |
5 |
6 | class Migration(migrations.Migration):
7 |
8 | dependencies = [
9 | ('shots', '0006_auto_20200204_1043'),
10 | ]
11 |
12 | operations = [
13 | migrations.AddField(
14 | model_name='screenshot',
15 | name='duration',
16 | field=models.IntegerField(blank=True, null=True),
17 | ),
18 | ]
19 |
--------------------------------------------------------------------------------
/src/shots/migrations/0008_auto_20200207_1203.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 3.0.2 on 2020-02-07 12:03
2 |
3 | from django.db import migrations, models
4 |
5 |
6 | class Migration(migrations.Migration):
7 |
8 | dependencies = [
9 | ('shots', '0007_screenshot_duration'),
10 | ]
11 |
12 | operations = [
13 | migrations.AlterField(
14 | model_name='screenshot',
15 | name='url',
16 | field=models.URLField(max_length=500),
17 | ),
18 | ]
19 |
--------------------------------------------------------------------------------
/src/shots/migrations/0009_auto_20200209_1605.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 3.0.2 on 2020-02-09 16:05
2 |
3 | import django.core.validators
4 | from django.db import migrations, models
5 | import shots.models
6 | import shots.validators
7 |
8 |
9 | class Migration(migrations.Migration):
10 |
11 | dependencies = [
12 | ('shots', '0008_auto_20200207_1203'),
13 | ]
14 |
15 | operations = [
16 | migrations.AlterField(
17 | model_name='screenshot',
18 | name='url',
19 | field=models.URLField(max_length=500, validators=[django.core.validators.URLValidator(), shots.validators.validate_hostname_dns]),
20 | ),
21 | ]
22 |
--------------------------------------------------------------------------------
/src/shots/migrations/0010_auto_20200209_2152.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 3.0.2 on 2020-02-09 21:52
2 |
3 | from django.db import migrations, models
4 |
5 |
6 | class Migration(migrations.Migration):
7 |
8 | dependencies = [
9 | ('shots', '0009_auto_20200209_1605'),
10 | ]
11 |
12 | operations = [
13 | migrations.AddField(
14 | model_name='screenshot',
15 | name='base64_full',
16 | field=models.TextField(blank=True, null=True),
17 | ),
18 | migrations.AddField(
19 | model_name='screenshot',
20 | name='base64_thumb',
21 | field=models.TextField(blank=True, null=True),
22 | ),
23 | ]
24 |
--------------------------------------------------------------------------------
/src/shots/migrations/0011_remove_screenshot_raw_html.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 3.0.2 on 2020-02-09 22:19
2 |
3 | from django.db import migrations
4 |
5 |
6 | class Migration(migrations.Migration):
7 |
8 | dependencies = [
9 | ('shots', '0010_auto_20200209_2152'),
10 | ]
11 |
12 | operations = [
13 | migrations.RemoveField(
14 | model_name='screenshot',
15 | name='raw_html',
16 | ),
17 | ]
18 |
--------------------------------------------------------------------------------
/src/shots/migrations/0012_remove_screenshot_image.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 3.0.2 on 2020-02-10 00:16
2 |
3 | from django.db import migrations
4 |
5 |
6 | class Migration(migrations.Migration):
7 |
8 | dependencies = [
9 | ('shots', '0011_remove_screenshot_raw_html'),
10 | ]
11 |
12 | operations = [
13 | ]
14 |
--------------------------------------------------------------------------------
/src/shots/migrations/0013_screenshot_image_binary.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 3.0.2 on 2020-02-10 08:35
2 |
3 | from django.db import migrations, models
4 |
5 |
6 | class Migration(migrations.Migration):
7 |
8 | dependencies = [
9 | ('shots', '0012_remove_screenshot_image'),
10 | ]
11 |
12 | operations = [
13 | migrations.AddField(
14 | model_name='screenshot',
15 | name='image_binary',
16 | field=models.BinaryField(blank=True, null=True),
17 | ),
18 | ]
19 |
--------------------------------------------------------------------------------
/src/shots/migrations/0014_auto_20200211_1242.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 3.0.2 on 2020-02-11 12:42
2 |
3 | from django.db import migrations, models
4 |
5 |
6 | class Migration(migrations.Migration):
7 |
8 | dependencies = [
9 | ('shots', '0013_screenshot_image_binary'),
10 | ]
11 |
12 | operations = [
13 | migrations.RemoveField(
14 | model_name='screenshot',
15 | name='base64_full',
16 | ),
17 | migrations.RemoveField(
18 | model_name='screenshot',
19 | name='base64_thumb',
20 | ),
21 | migrations.AddField(
22 | model_name='screenshot',
23 | name='format',
24 | field=models.CharField(choices=[('D', 'Desktop'), ('M', 'Mobile')], default='D', max_length=1),
25 | ),
26 | ]
27 |
--------------------------------------------------------------------------------
/src/shots/migrations/0015_auto_20200213_1418.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 3.0.3 on 2020-02-13 14:18
2 |
3 | from django.db import migrations, models
4 | import django.utils.timezone
5 |
6 |
7 | class Migration(migrations.Migration):
8 |
9 | dependencies = [
10 | ('shots', '0014_auto_20200211_1242'),
11 | ]
12 |
13 | operations = [
14 | migrations.AddField(
15 | model_name='screenshot',
16 | name='created_at',
17 | field=models.DateTimeField(auto_now_add=True, default=django.utils.timezone.now),
18 | preserve_default=False,
19 | ),
20 | ]
21 |
--------------------------------------------------------------------------------
/src/shots/migrations/0016_auto_20200228_1053.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 3.0.3 on 2020-02-28 10:53
2 |
3 | import django.core.validators
4 | from django.db import migrations, models
5 | import shots.validators
6 |
7 |
8 | class Migration(migrations.Migration):
9 |
10 | dependencies = [
11 | ('shots', '0015_auto_20200213_1418'),
12 | ]
13 |
14 | operations = [
15 | migrations.AddField(
16 | model_name='screenshot',
17 | name='callback_url',
18 | field=models.URLField(blank=True, max_length=500, null=True, validators=[django.core.validators.URLValidator(), shots.validators.validate_hostname_dns]),
19 | ),
20 | migrations.AddField(
21 | model_name='screenshot',
22 | name='created_with',
23 | field=models.CharField(choices=[('A', 'API'), ('B', 'Browser')], default='B', max_length=1),
24 | ),
25 | ]
26 |
--------------------------------------------------------------------------------
/src/shots/migrations/0017_screenshot_sleep_seconds.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 3.0.3 on 2020-03-07 23:17
2 |
3 | from django.db import migrations, models
4 |
5 |
6 | class Migration(migrations.Migration):
7 |
8 | dependencies = [
9 | ('shots', '0016_auto_20200228_1053'),
10 | ]
11 |
12 | operations = [
13 | migrations.AddField(
14 | model_name='screenshot',
15 | name='sleep_seconds',
16 | field=models.IntegerField(default=5),
17 | ),
18 | ]
19 |
--------------------------------------------------------------------------------
/src/shots/migrations/0018_screenshot_dpi.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 3.0.3 on 2020-03-08 02:03
2 |
3 | from django.db import migrations, models
4 |
5 |
6 | class Migration(migrations.Migration):
7 |
8 | dependencies = [
9 | ('shots', '0017_screenshot_sleep_seconds'),
10 | ]
11 |
12 | operations = [
13 | migrations.AddField(
14 | model_name='screenshot',
15 | name='dpi',
16 | field=models.DecimalField(decimal_places=1, default=1.0, max_digits=2),
17 | ),
18 | ]
19 |
--------------------------------------------------------------------------------
/src/shots/migrations/0019_auto_20200315_1818.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 3.0.3 on 2020-03-15 18:18
2 |
3 | from django.db import migrations, models
4 |
5 |
6 | class Migration(migrations.Migration):
7 |
8 | dependencies = [
9 | ('shots', '0018_screenshot_dpi'),
10 | ]
11 |
12 | operations = [
13 | migrations.AddField(
14 | model_name='screenshot',
15 | name='description',
16 | field=models.TextField(blank=True, null=True),
17 | ),
18 | migrations.AddField(
19 | model_name='screenshot',
20 | name='title',
21 | field=models.TextField(blank=True, null=True),
22 | ),
23 | ]
24 |
--------------------------------------------------------------------------------
/src/shots/migrations/0020_auto_20200315_2020.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 3.0.3 on 2020-03-15 20:20
2 |
3 | import django.contrib.postgres.fields.jsonb
4 | from django.db import migrations
5 |
6 |
7 | class Migration(migrations.Migration):
8 |
9 | dependencies = [
10 | ('shots', '0019_auto_20200315_1818'),
11 | ]
12 |
13 | operations = [
14 | migrations.RemoveField(
15 | model_name='screenshot',
16 | name='description',
17 | ),
18 | migrations.RemoveField(
19 | model_name='screenshot',
20 | name='title',
21 | ),
22 | migrations.AddField(
23 | model_name='screenshot',
24 | name='meta',
25 | field=django.contrib.postgres.fields.jsonb.JSONField(blank=True, null=True),
26 | ),
27 | ]
28 |
--------------------------------------------------------------------------------
/src/shots/migrations/0021_screenshot_file.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 3.0.3 on 2020-03-21 09:18
2 |
3 | from django.db import migrations, models
4 |
5 |
6 | class Migration(migrations.Migration):
7 |
8 | dependencies = [
9 | ('shots', '0020_auto_20200315_2020'),
10 | ]
11 |
12 | operations = [
13 | migrations.AddField(
14 | model_name='screenshot',
15 | name='file',
16 | field=models.FileField(blank=True, null=True, upload_to=''),
17 | ),
18 | ]
19 |
--------------------------------------------------------------------------------
/src/shots/migrations/0022_remove_screenshot_image_binary.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 3.0.3 on 2020-03-21 18:24
2 |
3 | from django.db import migrations
4 |
5 |
6 | class Migration(migrations.Migration):
7 |
8 | dependencies = [
9 | ('shots', '0021_screenshot_file'),
10 | ]
11 |
12 | operations = [
13 | migrations.RemoveField(
14 | model_name='screenshot',
15 | name='image_binary',
16 | ),
17 | ]
18 |
--------------------------------------------------------------------------------
/src/shots/migrations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glovebx/screenshots/6d328fdbbfe9076a70dedd3117005d1a5f1d9438/src/shots/migrations/__init__.py
--------------------------------------------------------------------------------
/src/shots/models.py:
--------------------------------------------------------------------------------
1 | from django.contrib.postgres.fields import JSONField
2 | from django.db import models
3 | import uuid
4 | from django.conf import settings
5 | from django.core import validators
6 | from shots.validators import validate_hostname_dns
7 | from django.shortcuts import reverse
8 |
9 |
10 | class ScreenShot(models.Model):
11 |
12 | NEW = 'N'
13 | PENDING = 'P'
14 | SUCCESS = 'S'
15 | FAILURE = 'F'
16 | RETRY_1 = 'R1'
17 | RETRY_2 = 'R2'
18 | RETRY_3 = 'R3'
19 | STATUS_CHOICES = (
20 | (NEW, 'New'),
21 | (PENDING, 'Pending'),
22 | (SUCCESS, 'Success'),
23 | (FAILURE, 'Failed'),
24 | (RETRY_1, 'Retry #1'),
25 | (RETRY_2, 'Retry #2'),
26 | (RETRY_3, 'Retry #3'),
27 | )
28 |
29 | DESKTOP = 'D'
30 | MOBILE = 'M'
31 | FORMAT_CHOICES = (
32 | (DESKTOP, 'Desktop'),
33 | (MOBILE, 'Mobile'),
34 | )
35 |
36 | BROWSER = 'B'
37 | API = 'A'
38 | CREATED_WITH_CHOICES = (
39 | (API, 'API'),
40 | (BROWSER, 'Browser')
41 | )
42 |
43 | id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
44 | url = models.URLField(max_length=500, validators=[validators.URLValidator(), validate_hostname_dns ])
45 | status = models.CharField(max_length=2, choices=STATUS_CHOICES, default=NEW)
46 | width = models.IntegerField(default=1366)
47 | height = models.IntegerField(default=768)
48 | keywords = models.CharField(blank=True, null=True, max_length=250)
49 | duration = models.IntegerField(null=True, blank=True)
50 | format = models.CharField(max_length=1, choices=FORMAT_CHOICES, default=DESKTOP)
51 | created_at = models.DateTimeField(auto_now_add=True)
52 | created_with = models.CharField(max_length=1, choices=CREATED_WITH_CHOICES, default=BROWSER)
53 | callback_url = models.URLField(null=True, blank=True, max_length=500,
54 | validators=[validators.URLValidator(), validate_hostname_dns ])
55 | sleep_seconds = models.IntegerField(default=5)
56 | dpi = models.DecimalField(default=1.0, decimal_places=1, max_digits=2)
57 | meta = JSONField(null=True, blank=True)
58 | file = models.FileField(null=True, blank=True)
59 |
60 | @property
61 | def resolution(self):
62 | return f"{self.width}x{self.height}"
63 |
64 | def get_absolute_url(self):
65 | return reverse("screenshot_get", kwargs={"id": self.id})
66 |
67 | @property
68 | def s3_url(self):
69 | return f"{settings.S3_ENDPOINT_URL}/{settings.S3_BUCKET_PREFIX}/{self.id.hex}.jpg"
70 |
--------------------------------------------------------------------------------
/src/shots/templates/about.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 |
3 | {% block title %}{% endblock %}
4 | {% block meta_description %}{% endblock %}
5 |
6 | {% block content %}
7 |
8 |
About Screenshots
9 |
10 |
11 | Hello there! Thanks for checking out ScreenShots. It is an experiment
12 | where I explore what it takes to build a robust screenshot tool
13 | that can handles the complexity of today's websites. Please feel free
14 | to try it out and make your own.
15 |
16 |
17 |
18 | Feel free to submit bug reports on this
19 | GitHub page.
21 |
22 |
23 |
24 |
25 |
26 |
Technical deatails of this project
27 | People are often curious about the technical details of the project such as tech stack and
28 | hosting.
29 |
30 |
Tech Stack
31 |
32 |
Virtual Machine (3-core VM/2Gig RAM)
33 |
Ubuntu Linux 18.04
34 |
Docker
35 |
Traefik for TLS and rverse proxy
36 |
Django
37 |
PostgreSQL
38 |
Selenium (For screenshots)
39 |
Tacit CSS
40 |
41 |
42 |
Hosting
43 |
Screenshots is hosted at Scaleway,
44 | a cloud services provider based in France. They offer budget-hosting solutions like
45 | the one I use here.
46 |
47 |
The server on which I run this service, the
48 | DEV1-M
49 | is a 3-core VM with 4Gigs or RAM and 20G local storage.
13 |
14 |
46 |
47 | This is a simple utility to get a website screenshot. Just
48 | plugin in the URL, wait a few seconds, and then get a snapshot
49 | of the desired website
50 |
51 |
52 |