├── .coveragerc ├── .dockerignore ├── .github └── workflows │ └── main.yml ├── .gitignore ├── .gitmodules ├── .travis.yml ├── CHANGES.rst ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── Makefile.buildbot ├── README.rst ├── benchmark └── notebooks │ └── Benchmark PNG vs JPEG.ipynb ├── bin └── splash ├── codecov.yml ├── conftest.py ├── debian ├── changelog ├── compat ├── control ├── copyright ├── dirs ├── postinst ├── rules └── upstart ├── dockerfiles ├── splash-jupyter │ └── Dockerfile ├── splash │ ├── build-qtwebkit.sh │ ├── create-package.sh │ ├── download-pyqt5.sh │ ├── download-qt-installer.sh │ ├── download-qtwebkit-source.sh │ ├── download-qtwebkit.sh │ ├── install-flash.sh │ ├── install-fonts.sh │ ├── install-pyqt5.sh │ ├── install-pyqtwebengine.sh │ ├── install-python-splash-deps.sh │ ├── install-qtwebengine-deps.sh │ ├── install-qtwebkit-build-deps.sh │ ├── install-qtwebkit-deps.sh │ ├── install-qtwebkit.sh │ ├── install-system-deps.sh │ ├── install-system-splash-deps.sh │ ├── prepare-install.sh │ ├── qt-installer-noninteractive.qs │ └── run-qt-installer.sh └── tests │ ├── Dockerfile │ └── runtests.sh ├── docs ├── Makefile ├── api.rst ├── changes.rst ├── conf.py ├── contributing.rst ├── faq.rst ├── index.rst ├── install.rst ├── internals │ ├── index.rst │ └── js-python-lua.rst ├── kernel.rst ├── make.bat ├── requirements.txt ├── scripting-binary-data.rst ├── scripting-element-object.rst ├── scripting-libs.rst ├── scripting-overview.rst ├── scripting-ref.rst ├── scripting-request-object.rst ├── scripting-response-object.rst └── scripting-tutorial.rst ├── requirements-dev.txt ├── requirements-jupyter.txt ├── requirements.txt ├── runsplash-docker.sh ├── runtests-docker.sh ├── scripts └── rst2inspections.py ├── setup.cfg ├── setup.py └── splash ├── __init__.py ├── _cmdline_utils.py ├── argument_cache.py ├── browser_tab.py ├── casperjs_utils.py ├── conftest.py ├── cookies.py ├── defaults.py ├── engines ├── __init__.py ├── chromium │ ├── __init__.py │ ├── browser_tab.py │ ├── constants.py │ ├── render_scripts.py │ ├── screenshot.py │ └── webpage.py └── webkit │ ├── __init__.py │ ├── browser_tab.py │ ├── http_client.py │ ├── render_scripts.py │ ├── screenshot.py │ ├── webpage.py │ └── webview.py ├── errors.py ├── examples ├── block-css.lua ├── call-later.lua ├── count-divs.lua ├── disable-images.lua ├── element-screenshot.lua ├── har.lua ├── log-locked-requests.lua ├── log-requests.lua ├── multiple-pages.lua ├── preload-functions.lua ├── preload-jquery.lua ├── render-multiple.lua ├── render-png.lua ├── return-title.lua ├── run-js.lua ├── scroll.lua ├── splash-haproxy.conf ├── submit-search.lua ├── wait-for-element.lua └── with-timeout.lua ├── har ├── __init__.py ├── log.py ├── qt.py ├── schema.py └── utils.py ├── har_builder.py ├── html_element.py ├── jsutils.py ├── kernel ├── __init__.py ├── __main__.py ├── completer.py ├── errors.py ├── inspections.py ├── inspections │ ├── README.rst │ ├── splash-auto.json │ └── splash.json ├── kernel.py ├── kernelbase.py ├── kernels │ ├── splash-py2 │ │ ├── kernel.json │ │ ├── logo-32x32.png │ │ └── logo-64x64.png │ └── splash-py3 │ │ ├── kernel.json │ │ ├── logo-32x32.png │ │ └── logo-64x64.png └── lua_parser.py ├── log.py ├── lua.py ├── lua_modules ├── completer.lua ├── element.lua ├── event.lua ├── extras.lua ├── libs │ ├── base64.lua │ ├── json.lua │ └── treat.lua ├── repr.lua ├── request.lua ├── response.lua ├── sandbox.lua ├── splash.lua ├── vendor │ ├── LICENSE-LEXER.md │ ├── inspect.lua │ └── lexer.lua └── wraputils.lua ├── lua_runner.py ├── lua_runtime.py ├── network_manager.py ├── pool.py ├── proxy.py ├── qtrender_image.py ├── qtrender_lua.py ├── qtutils.py ├── render_options.py ├── render_scripts.py ├── request_middleware.py ├── resources.py ├── response_middleware.py ├── sentry.py ├── server.py ├── tests ├── __init__.py ├── conftest.py ├── filters │ ├── noscript.txt │ └── noscript2.txt ├── js_profiles │ └── test │ │ ├── lib1.js │ │ ├── lib2.js │ │ └── lib_utf8.js ├── lua_modules │ ├── emulation.lua │ ├── secret.lua │ ├── utils.lua │ └── utils_patch.lua ├── mockserver.py ├── proxies.py ├── proxy_profiles │ ├── no-proxy-settings.ini │ ├── test.ini │ └── test_deprecated.ini ├── server.pem ├── stress.py ├── test_argument_caching.py ├── test_callback_proxy.py ├── test_client_disconnects.py ├── test_completer.py ├── test_execute.py ├── test_execute_callbacks.py ├── test_execute_element.py ├── test_execute_emulation.py ├── test_execute_libraries.py ├── test_execute_request_filters.py ├── test_har.py ├── test_har_utils.py ├── test_jsonpost.py ├── test_jupyter.py ├── test_lua_parser.py ├── test_lua_utils.py ├── test_proxy.py ├── test_qtutils.py ├── test_redirects.py ├── test_render.py ├── test_render_chromium.py ├── test_request_body_lua.py ├── test_request_filters.py ├── test_response_middleware.py ├── test_response_tracking.py ├── test_run.py ├── test_runjs.py ├── test_ui.py ├── test_utils.py ├── this-is-not-a-proxy-profile.ini └── utils.py ├── ui ├── favicon.ico ├── main.js └── style.css ├── utils.py └── xvfb.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = true 3 | include = splash/* 4 | omit = 5 | splash/tests/* 6 | splash/vendor/* 7 | splash/conftest.py 8 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | build/ 2 | dist/ 3 | dev/ 4 | .ipynb_checkpoints/ 5 | debian/ 6 | scripts/ 7 | benchmark/ 8 | notebooks/ 9 | splash.egg-info/ 10 | docs/ 11 | examples/ 12 | *.pyc 13 | __pycache__ 14 | .idea 15 | splash/vendor/harviewer/build-tools 16 | splash/vendor/harviewer/releases 17 | splash/vendor/harviewer/selenium 18 | splash/vendor/harviewer/wordpress 19 | splash/vendor/harviewer/requirejs 20 | .git/ 21 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: [push, pull_request] 3 | 4 | jobs: 5 | build: 6 | runs-on: ubuntu-latest 7 | 8 | steps: 9 | - 10 | name: Checkout 11 | uses: actions/checkout@v2.3.4 12 | - 13 | name: Build base image 14 | uses: docker/build-push-action@v2.7.0 15 | with: 16 | context: . 17 | tags: splash 18 | - 19 | name: Build tests image 20 | uses: docker/build-push-action@v2.7.0 21 | with: 22 | context: dockerfiles/tests 23 | tags: splash-tests 24 | - 25 | name: Run tests 26 | shell: 'script --return --quiet --command "bash {0}"' 27 | run: | 28 | docker run -it splash-tests 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # distutils/setuptools files 2 | build 3 | dist 4 | splash.egg-info 5 | docs/_build 6 | 7 | # temp files & IDE settings 8 | _trial_temp 9 | *.pyc 10 | .idea 11 | .ipynb_checkpoints 12 | .splash-cache* 13 | .coverage 14 | env* 15 | .cache/ 16 | htmlcov/ 17 | 18 | # IPython notebooks 19 | /notebooks/ 20 | 21 | # Dev files 22 | /dev/ 23 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "splash/vendor/harviewer"] 2 | path = splash/vendor/harviewer 3 | url = https://github.com/janodvarko/harviewer.git 4 | 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: trusty 2 | sudo: required 3 | services: 4 | - docker 5 | 6 | branches: 7 | only: 8 | - master 9 | - /^\d\.\d+$/ 10 | 11 | before_install: 12 | - docker build -t splash . 13 | - docker build -t splash-tests -f dockerfiles/tests/Dockerfile . 14 | 15 | script: 16 | - docker run `bash <(curl -s https://codecov.io/env)` -it splash-tests 17 | 18 | #cache: 19 | # directories: 20 | # - $HOME/.cache/pip 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) Scrapinghub 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | 14 | 3. Neither the name of Splash nor the names of its contributors may be used 15 | to endorse or promote products derived from this software without 16 | specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 25 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.rst 3 | include CHANGES.rst 4 | include docs/Makefile 5 | include docs/make.bat 6 | include docs/conf.py 7 | include requirements.txt 8 | 9 | recursive-include scripts *.py 10 | recursive-include docs *.rst 11 | recursive-include splash/ui *.* 12 | recursive-include splash/examples *.lua 13 | recursive-include splash/lua_modules *.lua *.md *.rst *.txt 14 | recursive-include splash/tests *.txt *.js *.ini *.lua 15 | recursive-include splash/vendor/harviewer/webapp *.js *.html *.css *.gif *.png *.swf *.html 16 | recursive-include splash/kernel/inspections *.json 17 | recursive-include splash/kernel/kernels *.json *.png 18 | -------------------------------------------------------------------------------- /Makefile.buildbot: -------------------------------------------------------------------------------- 1 | export DISPLAY=:0 2 | 3 | clean: 4 | git submodule update --init 5 | 6 | test: 7 | py.test 8 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ======================================= 2 | Splash - A javascript rendering service 3 | ======================================= 4 | 5 | .. image:: https://img.shields.io/travis/scrapinghub/splash/master.svg 6 | :alt: Build Status 7 | :target: https://travis-ci.org/scrapinghub/splash 8 | 9 | .. image:: https://img.shields.io/codecov/c/github/scrapinghub/splash/master.svg 10 | :alt: Coverage report 11 | :target: http://codecov.io/github/scrapinghub/splash?branch=master 12 | 13 | .. image:: https://img.shields.io/badge/GITTER-join%20chat-green.svg 14 | :alt: Join the chat at https://gitter.im/scrapinghub/splash 15 | :target: https://gitter.im/scrapinghub/splash 16 | 17 | Splash is a javascript rendering service with an HTTP API. It's a lightweight 18 | browser with an HTTP API, implemented in Python 3 using Twisted and QT5. 19 | 20 | It's fast, lightweight and state-less which makes it easy to distribute. 21 | 22 | Documentation 23 | ------------- 24 | 25 | Documentation is available here: 26 | https://splash.readthedocs.io/ 27 | 28 | Using Splash with Scrapy 29 | ------------------------ 30 | 31 | To use Splash with Scrapy, please refer to the `scrapy-splash library`_. 32 | 33 | Support 34 | ------- 35 | 36 | Open source support is provided here in GitHub. Please `create a question 37 | issue`_. 38 | 39 | Commercial support is also available by `Scrapinghub`_. 40 | 41 | .. _create a question issue: https://github.com/scrapinghub/splash/issues/new?labels=question 42 | .. _Scrapinghub: https://scrapinghub.com 43 | .. _scrapy-splash library: https://github.com/scrapy-plugins/scrapy-splash 44 | -------------------------------------------------------------------------------- /bin/splash: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from splash.server import main 4 | main() 5 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: 2 | layout: "header, diff, tree" 3 | 4 | coverage: 5 | status: 6 | project: false 7 | -------------------------------------------------------------------------------- /conftest.py: -------------------------------------------------------------------------------- 1 | collect_ignore = ["setup.py"] 2 | -------------------------------------------------------------------------------- /debian/changelog: -------------------------------------------------------------------------------- 1 | splash (1.0) unstable; urgency=low 2 | 3 | * Initial release. 4 | 5 | -- Scrapinghub Team Wed, 27 May 2013 11:29:29 -0300 6 | 7 | -------------------------------------------------------------------------------- /debian/compat: -------------------------------------------------------------------------------- 1 | 7 2 | -------------------------------------------------------------------------------- /debian/control: -------------------------------------------------------------------------------- 1 | Source: splash 2 | Section: misc 3 | Priority: extra 4 | Maintainer: Scrapinghub Team 5 | Build-Depends: debhelper (>= 7) 6 | Standards-Version: 3.8.3 7 | Homepage: http://www.scrapinghub.com/ 8 | 9 | Package: splash 10 | Architecture: all 11 | Depends: ${misc:Depends}, ${python:Depends}, 12 | hub-xvfb, 13 | libqt4-webkit, 14 | python-twisted, 15 | python-qt4, 16 | python-qt4reactor, 17 | libicu48, 18 | python-raven, 19 | python-psutil, 20 | zlib1g-dev, 21 | python-imaging, 22 | openssl 23 | Description: Page thumbnailing service 24 | This is the service that powers webpage thumbnailing for Autoscraping 25 | (templates and annotation runs) 26 | -------------------------------------------------------------------------------- /debian/copyright: -------------------------------------------------------------------------------- 1 | Copyright (C) 2013 Scrapinghub 2 | -------------------------------------------------------------------------------- /debian/dirs: -------------------------------------------------------------------------------- 1 | var/log/splash 2 | etc/splash/proxy-profiles 3 | etc/splash/js-profiles 4 | etc/splash/filters 5 | -------------------------------------------------------------------------------- /debian/postinst: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | case "$1" in 6 | configure) 7 | update-python-modules -p # to upstart update code before triggering code reload 8 | ;; 9 | 10 | *) 11 | echo "postinst called with unknown argument \`$1'" >&2 12 | exit 1 13 | ;; 14 | esac 15 | 16 | #DEBHELPER# 17 | 18 | exit 0 19 | -------------------------------------------------------------------------------- /debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | # -*- makefile -*- 3 | # Sample debian/rules that uses debhelper. 4 | # This file was originally written by Joey Hess and Craig Small. 5 | # As a special exception, when this file is copied by dh-make into a 6 | # dh-make output file, you may use that output file without restriction. 7 | # This special exception was added by Craig Small in version 0.37 of dh-make. 8 | 9 | # Uncomment this to turn on verbose mode. 10 | #export DH_VERBOSE=1 11 | 12 | %: 13 | dh $@ 14 | -------------------------------------------------------------------------------- /debian/upstart: -------------------------------------------------------------------------------- 1 | start on runlevel [2345] 2 | stop on runlevel [06] 3 | 4 | limit nofile 32768 32768 5 | env DISPLAY=:0 6 | respawn 7 | 8 | script 9 | [ -r /etc/default/splash ] && . /etc/default/splash 10 | logdir=${SPLASH_LOGDIR:-/var/log/splash} 11 | proxyprofilespath=${SPLASH_PROXYPROFILESPATH:-/etc/splash/proxy-profiles} 12 | jsprofilespath=${SPLASH_JSPROFILESPATH:-/etc/splash/js-profiles} 13 | filterspath=${SPLASH_FILTERSPATH:-/etc/splash/filters} 14 | maxrss=${SPLASH_MAXRSS:-0.75} 15 | 16 | chown proxy:proxy $logdir $proxyprofilespath $cachepath $jsprofilespath $filterspath 17 | 18 | exec start-stop-daemon --start \ 19 | --chuid proxy:proxy \ 20 | --make-pidfile \ 21 | --pidfile /var/run/splash.pid \ 22 | --exec /usr/bin/python -- \ 23 | -m splash.server \ 24 | --maxrss $maxrss \ 25 | --logfile $logdir/splash.log \ 26 | --proxy-profiles-path=$proxyprofilespath \ 27 | --js-profiles-path=$jsprofilespath \ 28 | --filters-path=$filterspath \ 29 | --disable-lua \ 30 | ${SPLASH_OPTS} \ 31 | >$logdir/splash.out 2>$logdir/splash.err 32 | end script 33 | -------------------------------------------------------------------------------- /dockerfiles/splash-jupyter/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM scrapinghub/splash:master 2 | # XXX: after each release a new branch named X.Y should be created, 3 | # and FROM should be changed to FROM scrapinghub/splash:X.Y 4 | 5 | USER root:root 6 | RUN apt-get update -q && \ 7 | apt-get install --no-install-recommends -y \ 8 | libzmq3-dev \ 9 | libsqlite3-0 \ 10 | libssl1.0-dev \ 11 | python3-dev \ 12 | build-essential \ 13 | python3-cryptography \ 14 | python3-openssl \ 15 | libsqlite3-dev 16 | 17 | # ADD . /app 18 | RUN pip3 install -r /app/requirements-jupyter.txt 19 | # RUN pip3 freeze 20 | RUN mkdir /notebooks & chown splash:splash /notebooks 21 | USER splash:splash 22 | 23 | RUN python3 -m splash.kernel install && \ 24 | echo '#!/bin/bash\nSPLASH_ARGS="$@" jupyter notebook --allow-root --no-browser --NotebookApp.iopub_data_rate_limit=10000000000 --port=8888 --ip=0.0.0.0' > /app/start-notebook.sh && \ 25 | chmod +x /app/start-notebook.sh 26 | 27 | VOLUME /notebooks 28 | WORKDIR /notebooks 29 | 30 | EXPOSE 8888 31 | ENTRYPOINT ["/app/start-notebook.sh"] 32 | -------------------------------------------------------------------------------- /dockerfiles/splash/build-qtwebkit.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | mkdir -p /tmp/builds/qtwebkit && \ 4 | cd /tmp/builds && \ 5 | tar xvfJ "$1" --keep-newer-files --strip-components 1 && \ 6 | mkdir build && \ 7 | cd build && \ 8 | cmake -G Ninja -DPORT=Qt -DCMAKE_BUILD_TYPE=Release .. && \ 9 | ninja && \ 10 | ninja install -------------------------------------------------------------------------------- /dockerfiles/splash/create-package.sh: -------------------------------------------------------------------------------- 1 | description=' 2 | Packages from https://github.com/qtwebkit/qtwebkit/releases are used for 3 | building new splash images. It is problematic to create compatible packages 4 | using only CPack(CMake packaging module). This script creates packages 5 | compatible with qtwebkit releases. 6 | It has next arguments: 7 | - install_manifest_txt - file created by CMake after running install command, e.g. 8 | ninja install 9 | ' 10 | install_manifest_txt=$1 11 | 12 | qt_root=`qmake -query QT_INSTALL_PREFIX` 13 | 14 | prefix_len=${#qt_root} 15 | 16 | work_dir=`mktemp -d` 17 | trap "rm -rf $work_dir" EXIT 18 | 19 | cat $install_manifest_txt | 20 | while read -r source; 21 | do 22 | target_file=$work_dir/`echo "$source" | cut -c $((prefix_len + 1))-` 23 | placement_dir=`dirname $target_file` 24 | mkdir -p $placement_dir 25 | cp $source $target_file 26 | done 27 | 28 | result_dir=`dirname $install_manifest_txt` 29 | result=`realpath $result_dir/build.7z` 30 | rm -rf $result 31 | 32 | pushd $work_dir 33 | 7z a $result ./* 34 | popd 35 | -------------------------------------------------------------------------------- /dockerfiles/splash/download-pyqt5.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | # XXX: riverbankcomputing URLs need to be replaced with files.pythonhosted.org in future, 3 | # because riverbank tend to remove old releases. 4 | SIP="https://pkgrepo.scrapinghub.com/oss/src/sip-4.19.22.tar.gz" 5 | PYQT="https://files.pythonhosted.org/packages/4d/81/b9a66a28fb9a7bbeb60e266f06ebc4703e7e42b99e3609bf1b58ddd232b9/PyQt5-5.14.2.tar.gz" 6 | WEBENGINE="https://files.pythonhosted.org/packages/47/9f/60e630711fd1dd14ef3bd95c86c733c86b8c0853749c7a03691f681f13fd/PyQtWebEngine-5.14.0.tar.gz" 7 | 8 | curl --fail -L -o "$1" ${SIP} && \ 9 | curl --fail -L -o "$2" ${PYQT} && \ 10 | curl --fail -L -o "$3" ${WEBENGINE} 11 | -------------------------------------------------------------------------------- /dockerfiles/splash/download-qt-installer.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | URL="https://download.qt.io/archive/qt/$QT_SHORT_VERSION/$QT_FULL_VERSION/qt-opensource-linux-x64-$QT_FULL_VERSION.run" 4 | 5 | curl --fail -L -o "$1" ${URL} 6 | -------------------------------------------------------------------------------- /dockerfiles/splash/download-qtwebkit-source.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | URL="https://github.com/qtwebkit/qtwebkit/releases/download/qtwebkit-5.212.0-alpha3/qtwebkit-5.212.0-alpha3.tar.xz" 3 | curl --fail -L -o "$1" ${URL} 4 | -------------------------------------------------------------------------------- /dockerfiles/splash/download-qtwebkit.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | curl --fail -L -o "$1" ${WEBKIT_URL} 3 | -------------------------------------------------------------------------------- /dockerfiles/splash/install-flash.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | apt-get update -q && \ 3 | apt-get install -y flashplugin-installer 4 | -------------------------------------------------------------------------------- /dockerfiles/splash/install-fonts.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | install_msfonts() { 4 | # Agree with EULA and install Microsoft fonts 5 | # apt-add-repository -y "deb http://archive.ubuntu.com/ubuntu xenial multiverse" && \ 6 | # apt-add-repository -y "deb http://archive.ubuntu.com/ubuntu xenial-updates multiverse" && \ 7 | # apt-get update && \ 8 | echo ttf-mscorefonts-installer msttcorefonts/accepted-mscorefonts-eula select true | debconf-set-selections && \ 9 | apt-get install --no-install-recommends -y ttf-mscorefonts-installer 10 | } 11 | 12 | install_extra_fonts() { 13 | # Install extra fonts (Chinese and other) 14 | apt-get install --no-install-recommends -y \ 15 | fonts-liberation \ 16 | ttf-wqy-zenhei \ 17 | fonts-arphic-gbsn00lp \ 18 | fonts-arphic-bsmi00lp \ 19 | fonts-arphic-gkai00mp \ 20 | fonts-arphic-bkai00mp \ 21 | fonts-beng 22 | } 23 | apt-get update -q && \ 24 | install_msfonts && install_extra_fonts -------------------------------------------------------------------------------- /dockerfiles/splash/install-pyqt5.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | _PYTHON=python3 4 | 5 | mkdir -p /tmp/builds/sip && \ 6 | mkdir -p /tmp/builds/pyqt5 && \ 7 | pushd /tmp/builds && \ 8 | # sip 9 | tar xzf "$1" --keep-newer-files -C sip --strip-components 1 && \ 10 | pushd sip && \ 11 | ${_PYTHON} configure.py --sip-module PyQt5.sip && \ 12 | make -j ${SPLASH_BUILD_PARALLEL_JOBS} && \ 13 | make install && \ 14 | popd && \ 15 | # PyQt5 16 | tar xzf "$2" --keep-newer-files -C pyqt5 --strip-components 1 && \ 17 | pushd pyqt5 && \ 18 | ${_PYTHON} configure.py -c -j ${SPLASH_BUILD_PARALLEL_JOBS} \ 19 | --verbose \ 20 | --confirm-license \ 21 | --no-designer-plugin \ 22 | --no-qml-plugin \ 23 | --no-python-dbus \ 24 | -e QtCore \ 25 | -e QtGui \ 26 | -e QtWidgets \ 27 | -e QtNetwork \ 28 | -e QtWebKit \ 29 | -e QtWebKitWidgets \ 30 | -e QtWebChannel \ 31 | -e QtSvg \ 32 | -e QtQuick \ 33 | -e QtPrintSupport && \ 34 | make -j ${SPLASH_BUILD_PARALLEL_JOBS} && \ 35 | make install && \ 36 | popd && \ 37 | ${_PYTHON} -c "import PyQt5.QtCore; print(PyQt5.QtCore.__file__)" 38 | 39 | # Builds Complete 40 | popd 41 | -------------------------------------------------------------------------------- /dockerfiles/splash/install-pyqtwebengine.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | _PYTHON=python3 4 | 5 | mkdir -p /tmp/builds/webengine && \ 6 | pushd /tmp/builds && \ 7 | # PyQtWebEngine 8 | tar xzf "$1" --keep-newer-files -C webengine --strip-components 1 && \ 9 | pushd webengine && \ 10 | ${_PYTHON} configure.py -c -j ${SPLASH_BUILD_PARALLEL_JOBS} && \ 11 | make -j ${SPLASH_BUILD_PARALLEL_JOBS} && \ 12 | make install && \ 13 | popd && \ 14 | popd 15 | -------------------------------------------------------------------------------- /dockerfiles/splash/install-python-splash-deps.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | _PYTHON=python3 3 | 4 | install_python_deps () { 5 | # Install python-level dependencies. 6 | ${_PYTHON} -m pip install -U pip setuptools==57.5.0 six && \ 7 | ${_PYTHON} -m pip install \ 8 | qt5reactor==0.5 \ 9 | psutil==5.0.0 \ 10 | "Twisted[http2]==19.7.0" \ 11 | adblockparser==0.7 \ 12 | xvfbwrapper==0.2.9 \ 13 | funcparserlib==0.3.6 \ 14 | Pillow==5.4.1 \ 15 | attrs==18.2.0 \ 16 | lupa==1.3 && \ 17 | ${_PYTHON} -m pip install https://github.com/sunu/pyre2/archive/c610be52c3b5379b257d56fc0669d022fd70082a.zip#egg=re2 18 | } 19 | 20 | install_python_deps 21 | -------------------------------------------------------------------------------- /dockerfiles/splash/install-qtwebengine-deps.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Lists are from https://wiki.qt.io/QtWebEngine/How_to_Try, 4 | # but non-development versions, and with some packages removed 5 | apt-get update -q && \ 6 | apt-get install -y --no-install-recommends \ 7 | libasound2 \ 8 | libbz2-dev \ 9 | libcap-dev \ 10 | libcups2 \ 11 | libdrm-dev \ 12 | libegl1-mesa \ 13 | libgcrypt11-dev \ 14 | libnss3 \ 15 | libpci-dev \ 16 | libpulse-dev \ 17 | libudev-dev \ 18 | libxtst-dev && \ 19 | 20 | apt-get install -y --no-install-recommends \ 21 | openssl1.0 \ 22 | libssl1.0-dev \ 23 | libxcursor-dev \ 24 | libxcomposite-dev \ 25 | libxdamage-dev \ 26 | libxrandr-dev \ 27 | libfontconfig1 \ 28 | libxss-dev \ 29 | libsrtp0 \ 30 | libwebp-dev \ 31 | libjsoncpp-dev \ 32 | libopus-dev \ 33 | libminizip-dev \ 34 | libavutil-dev \ 35 | libavformat-dev \ 36 | libavcodec-dev \ 37 | libevent-dev 38 | -------------------------------------------------------------------------------- /dockerfiles/splash/install-qtwebkit-build-deps.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | apt-get update -q && \ 3 | apt-get install -y --no-install-recommends \ 4 | cmake \ 5 | ninja-build \ 6 | bison \ 7 | gperf \ 8 | ruby \ 9 | python 10 | -------------------------------------------------------------------------------- /dockerfiles/splash/install-qtwebkit-deps.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | apt-get update -q && \ 3 | apt-get install -y --no-install-recommends \ 4 | libssl1.0-dev \ 5 | libjpeg-turbo8-dev \ 6 | libgl1-mesa-dev \ 7 | libglu1-mesa-dev \ 8 | mesa-common-dev \ 9 | libfontconfig1-dev \ 10 | libicu-dev \ 11 | libpng-dev \ 12 | libxslt1-dev \ 13 | libxml2-dev \ 14 | libhyphen-dev \ 15 | libgbm1 \ 16 | libxcb-image0 \ 17 | libxcb-icccm4 \ 18 | libxcb-keysyms1 \ 19 | libxcb-render-util0 \ 20 | libxkbcommon-x11-0 \ 21 | libxi6 \ 22 | libxcomposite-dev \ 23 | libxrender-dev \ 24 | libgstreamer1.0-dev \ 25 | libgstreamer-plugins-base1.0-dev \ 26 | libgstreamer-plugins-good1.0-dev \ 27 | gstreamer1.0-plugins-good \ 28 | gstreamer1.0-x \ 29 | gstreamer1.0-libav \ 30 | webp \ 31 | rsync 32 | -------------------------------------------------------------------------------- /dockerfiles/splash/install-qtwebkit.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | apt-get update -q && \ 3 | apt-get install -y p7zip-full && \ 4 | mkdir -p /tmp/builds/qtwebkit && \ 5 | cd /tmp/builds/qtwebkit && \ 6 | cp "$1" ./webkit.7z && \ 7 | 7z x ./webkit.7z -xr!*.debug && \ 8 | rm webkit.7z && \ 9 | rsync \ 10 | -aP /tmp/builds/qtwebkit/* \ 11 | $(qmake -query QT_INSTALL_PREFIX) 12 | -------------------------------------------------------------------------------- /dockerfiles/splash/install-system-deps.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Install system dependencies for Qt 4 | apt-get update -q && \ 5 | apt-get install -y --no-install-recommends \ 6 | xvfb \ 7 | build-essential \ 8 | libsqlite3-dev \ 9 | zlib1g \ 10 | zlib1g-dev \ 11 | netbase \ 12 | ca-certificates \ 13 | pkg-config -------------------------------------------------------------------------------- /dockerfiles/splash/install-system-splash-deps.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Install system dependencies for Qt, Python packages, etc. 4 | # ppa:pi-rho/security is a repo for libre2-dev 5 | add-apt-repository -y ppa:pi-rho/security && \ 6 | apt-get update -q && \ 7 | apt-get install -y --no-install-recommends \ 8 | python3 \ 9 | python3-dev \ 10 | python3-pip \ 11 | libre2-dev \ 12 | liblua5.2-dev \ 13 | libsqlite3-dev \ 14 | zlib1g \ 15 | zlib1g-dev \ 16 | netbase \ 17 | ca-certificates \ 18 | pkg-config 19 | -------------------------------------------------------------------------------- /dockerfiles/splash/prepare-install.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Prepare docker image for installation of packages, docker images are 3 | # usually stripped and apt-get doesn't work immediately. 4 | # 5 | # python-software-properties contains "add-apt-repository" command for PPA conf 6 | sed 's/main$/main universe/' -i /etc/apt/sources.list && \ 7 | apt-get update -q && \ 8 | apt-get install -y --no-install-recommends \ 9 | curl \ 10 | software-properties-common \ 11 | apt-transport-https \ 12 | python3-software-properties 13 | -------------------------------------------------------------------------------- /dockerfiles/splash/qt-installer-noninteractive.qs: -------------------------------------------------------------------------------- 1 | // Installer script for qt. 2 | // Based on https://github.com/rabits/dockerfiles/blob/master/5.13-desktop/extract-qt-installer.sh 3 | // See https://doc.qt.io/qtinstallerframework/noninteractive.html 4 | 5 | function Controller() { 6 | //installer.autoRejectMessageBoxes(); 7 | installer.installationFinished.connect(function() { 8 | gui.clickButton(buttons.NextButton); 9 | }) 10 | } 11 | 12 | Controller.prototype.WelcomePageCallback = function() { 13 | console.log("Welcome Page"); 14 | gui.clickButton(buttons.NextButton, 3000); 15 | } 16 | 17 | Controller.prototype.CredentialsPageCallback = function() { 18 | gui.clickButton(buttons.CommitButton); 19 | } 20 | 21 | Controller.prototype.IntroductionPageCallback = function() { 22 | gui.clickButton(buttons.NextButton); 23 | } 24 | 25 | Controller.prototype.TargetDirectoryPageCallback = function() 26 | { 27 | var short_version = installer.environmentVariable("QT_SHORT_VERSION"); 28 | var path = "/opt/qt-" + short_version 29 | 30 | gui.currentPageWidget().TargetDirectoryLineEdit.setText(path); 31 | gui.clickButton(buttons.NextButton); 32 | } 33 | 34 | Controller.prototype.ComponentSelectionPageCallback = function() { 35 | var major_version = installer.environmentVariable("QT_MAJOR_VERSION"); 36 | var minor_version = installer.environmentVariable("QT_MINOR_VERSION"); 37 | var patch_version = installer.environmentVariable("QT_PATCH_VERSION"); 38 | var qt_version = "qt.qt" + major_version + "." + major_version + minor_version + patch_version; 39 | 40 | var components = [ 41 | qt_version+ ".gcc_64", 42 | qt_version+ ".qtwebengine", 43 | qt_version+ ".qtnetworkauth", 44 | ] 45 | console.log("Select components"); 46 | var widget = gui.currentPageWidget(); 47 | for (var i=0; i < components.length; i++){ 48 | widget.selectComponent(components[i]); 49 | console.log("selected: " + components[i]) 50 | } 51 | gui.clickButton(buttons.NextButton); 52 | } 53 | 54 | Controller.prototype.LicenseAgreementPageCallback = function() { 55 | console.log("Accept license agreement"); 56 | var widget = gui.currentPageWidget(); 57 | if (widget != null) { 58 | widget.AcceptLicenseRadioButton.setChecked(true); 59 | } 60 | gui.clickButton(buttons.NextButton); 61 | } 62 | 63 | Controller.prototype.ReadyForInstallationPageCallback = function() { 64 | console.log("Ready to install"); 65 | gui.clickButton(buttons.CommitButton); 66 | } 67 | 68 | Controller.prototype.FinishedPageCallback = function() { 69 | var widget = gui.currentPageWidget(); 70 | if (widget.LaunchQtCreatorCheckBoxForm) { 71 | widget.LaunchQtCreatorCheckBoxForm.launchQtCreatorCheckBox.setChecked(false); 72 | } 73 | gui.clickButton(buttons.FinishButton); 74 | } 75 | -------------------------------------------------------------------------------- /dockerfiles/splash/run-qt-installer.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # XXX: if qt version is changed, Dockerfile should be updated, 3 | # as well as qt-installer-noninteractive.qs script. 4 | 5 | if [ -z ${DISPLAY+x} ]; then 6 | command="xvfb-run $1" 7 | else 8 | command="$1" 9 | fi 10 | 11 | chmod +x "$1" && \ 12 | http_proxy="http://localhost:8080" https_proxy="http://localhost:8080" $command --script "$2" \ 13 | | egrep -v '\[[0-9]+\] Warning: (Unsupported screen format)|((QPainter|QWidget))' && \ 14 | ls /opt/qt-$QT_SHORT_VERSION/ && \ 15 | # cat /opt/qt-$QT_SHORT_VERSION/InstallationLog.txt && \ 16 | cat /opt/qt-$QT_SHORT_VERSION/components.xml 17 | -------------------------------------------------------------------------------- /dockerfiles/tests/Dockerfile: -------------------------------------------------------------------------------- 1 | # Docker file for running Splash tests. 2 | # It needs a base image named "splash"; 3 | # build it by running ``docker build -t splash .`` from Splash 4 | # source checkout. 5 | # 6 | # XXX: in future it should be possible to base this image on 7 | # scrapinghub/splash:master. 8 | FROM splash 9 | 10 | USER root:root 11 | RUN apt-get update -q && \ 12 | apt-get install --no-install-recommends -y \ 13 | libzmq3-dev \ 14 | libsqlite3-0 \ 15 | libssl1.0-dev \ 16 | python3-dev \ 17 | build-essential \ 18 | python3-cryptography \ 19 | python3-openssl \ 20 | libsqlite3-dev \ 21 | git 22 | 23 | # ADD . /app 24 | RUN pip3 install -r /app/requirements-dev.txt 25 | RUN pip3 install -U pytest-cov coverage codecov pytest-xdist 26 | 27 | ADD . /app 28 | RUN pip3 install /app 29 | 30 | WORKDIR /app 31 | RUN find . -name \*.pyc -delete 32 | 33 | RUN chown -R splash:splash /app 34 | USER splash:splash 35 | ENTRYPOINT ["/app/dockerfiles/tests/runtests.sh"] 36 | CMD ["splash"] -------------------------------------------------------------------------------- /dockerfiles/tests/runtests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | py.test --cov=splash --doctest-modules --durations=50 "$@" 3 | -------------------------------------------------------------------------------- /docs/changes.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../CHANGES.rst 2 | -------------------------------------------------------------------------------- /docs/contributing.rst: -------------------------------------------------------------------------------- 1 | Contributing to Splash 2 | ====================== 3 | 4 | Splash is free & open source. 5 | Development happens at GitHub: https://github.com/scrapinghub/splash 6 | 7 | Testing Suite 8 | ------------- 9 | 10 | .. image:: https://secure.travis-ci.org/scrapinghub/splash.png?branch=master 11 | :target: http://travis-ci.org/scrapinghub/splash 12 | 13 | The recommended way to execute Splash testing suite is to use a special 14 | testing Docker container. 15 | 16 | 1. First, create a base Splash image named "splash". If you're not 17 | customizing Splash dependencies, and your changes are based on Splash 18 | master branch, you can use ``scrapinghub/splash:master`` image:: 19 | 20 | docker pull scrapinghub/splash:master 21 | docker tag scrapinghub/splash:master splash 22 | 23 | If you've changed Splash dependencies (Python-level or system-level) 24 | then you have to build Splash image from scratch. Run the following 25 | command from the source checkout:: 26 | 27 | docker build -t splash . 28 | 29 | It can take a while (maybe half an hour). 30 | Alternatively, you can temporarily change ``dockerfiles/tests/Dockerfile`` 31 | or ``setup.py`` to install new dependencies. 32 | 33 | 2. Create a testing Docker image:: 34 | 35 | docker build -t splash-tests -f dockerfiles/tests/Dockerfile . 36 | 37 | Testing Docker image is based on ``splash`` docker image, so you need to 38 | have an image called ``splash`` - we created such image at step (1). 39 | 40 | 3. Run tests inside this testing image:: 41 | 42 | docker run --rm -it splash-tests 43 | 44 | You can also pass pytest command-line arguments in the command above. 45 | For example, you can select only a subset of tests to execute 46 | (SandboxTest test case in this example):: 47 | 48 | docker run --rm -it splash-tests -k SandboxTest 49 | 50 | If you've changed Splash source code and want to re-run tests, repeat steps 51 | (2) and (3). Step (2) should take much less time now. 52 | Repeating step (1) is only necessary if you're adding new 53 | dependencies to Splash (Python or system-level), or if you want to update 54 | the base Splash image (e.g. after a recent rebase on Splash master). 55 | 56 | There is a script in the root of Splash repository 57 | (``runtests-docker.sh``) which combines steps (2) and (3); you can use it 58 | during development to run tests: change Splash source code or testing source 59 | code, then run ``./runtests-docker.sh`` from source checkout. 60 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | ======================================= 2 | Splash - A javascript rendering service 3 | ======================================= 4 | 5 | Splash is a javascript rendering service. It's a lightweight web browser 6 | with an HTTP API, implemented in Python 3 using Twisted and QT5. The (twisted) 7 | QT reactor is used to make the service fully asynchronous allowing 8 | to take advantage of webkit concurrency via QT main loop. Some of Splash 9 | features: 10 | 11 | * process multiple webpages in parallel; 12 | * get HTML results and/or take screenshots; 13 | * turn OFF images or use Adblock Plus rules to make rendering faster; 14 | * execute custom JavaScript in page context; 15 | * write Lua browsing :ref:`scripts `; 16 | * develop Splash Lua scripts in :ref:`Splash-Jupyter ` 17 | Notebooks. 18 | * get detailed rendering info in HAR format. 19 | 20 | 21 | Documentation 22 | ============= 23 | 24 | .. toctree:: 25 | :maxdepth: 2 26 | 27 | install 28 | api 29 | scripting-tutorial 30 | scripting-overview 31 | scripting-ref 32 | scripting-response-object 33 | scripting-request-object 34 | scripting-element-object 35 | scripting-binary-data 36 | scripting-libs 37 | kernel 38 | faq 39 | contributing 40 | internals/index 41 | changes 42 | 43 | -------------------------------------------------------------------------------- /docs/internals/index.rst: -------------------------------------------------------------------------------- 1 | Implementation Details 2 | ====================== 3 | 4 | This section contains information useful if you want to understand 5 | Splash codebase. 6 | 7 | .. toctree:: 8 | :maxdepth: 1 9 | 10 | js-python-lua 11 | -------------------------------------------------------------------------------- /docs/internals/js-python-lua.rst: -------------------------------------------------------------------------------- 1 | JavaScript <-> Python <-> Lua intergation 2 | ========================================= 3 | 4 | Lua and JavaScript are not connected directly; they communicate through Python. 5 | 6 | Python <-> Lua is handled using lupa library. 7 | :func:`splash.qtrender_lua.command` decorator handles most of Python <-> Lua 8 | integration. 9 | 10 | Python <-> JavaScript is handled using custom serialization code. 11 | QT host objects are not used (with a few exceptions). Instead of this 12 | JavaScript results are sanitized and processed in Python; 13 | Python results are encoded to JSON and decoded/processed 14 | in JavaScript. 15 | 16 | Python -> Lua 17 | ------------- 18 | 19 | Data is converted from Python to Lua in two cases: 20 | 21 | 1. method of an exposed Python object returns a result 22 | (most common example is a method of ``splash`` Lua object); 23 | 2. Python code calls Lua function with arguments - it could be e.g. 24 | an on_request callback. 25 | 26 | Conversion rules: 27 | 28 | * Basic Python types are converted to Lua: strings -> Lua strings, 29 | lists and dicts -> Lua tables, numbers -> Lua numbers, None -> nil(?). 30 | 31 | This is handled using :meth:`splash.lua_runtime.SplashLuaRuntime.python2lua` 32 | method. For attributes exposed to Lua this method is called manually; 33 | for return results of Python functions / methods it is handled by 34 | :func:`splash.qtrender_lua.emits_lua_objects` decorator. Methods decorated 35 | with ``@command`` use ``splash.qtrender_lua.emits_lua_objects`` internally, 36 | so a Python method decorated with ``@command`` decorator may return Python 37 | result in its body, and the final result would be a Lua object. 38 | 39 | * If there is a need to expose a custom Python object to Lua then 40 | a subclass of :class:`splash.qtrender_lua.BaseExposedObject` is used; it is 41 | wrapped to a Lua table using utilities from wraputils.lua. 42 | Lua table exposes allowlisted attributes and methods of the object 43 | using metatable, and disallows access to all other attributes. 44 | 45 | * Other than that, there is no automatic conversion. If something is not 46 | converted then it is available for Lua as an opaque userdata object; 47 | access to methods and attributes is disabled by a sandbox. 48 | 49 | * To prevent wrapping method may return :class:`splash.lua.PyResult` instance. 50 | 51 | 52 | Lua -> Python 53 | ------------- 54 | 55 | Lua -> Python conversion is needed in two cases: 56 | 57 | 1. Lua code calls Python code, passing some arguments; 58 | 2. Python code calls Lua code and wants a result back. 59 | 60 | * Basic Lua types are converted to Python using 61 | :meth:`splash.lua_runtime.SplashLuaRuntime.lua2python`. For method arguments 62 | lua2python is called by :func:`splash.qtrender_lua.decodes_lua_arguments` 63 | decorator; ``@command`` decorator uses ``decodes_lua_arguments`` internally. 64 | 65 | * Python objects which were exposed to Lua (BaseExposedObject subclasses) 66 | are **not** converted back. By default they raise an error; 67 | with decode_arguments=False they are available as opaque 68 | Lua (lupa) table objects. 69 | 70 | :func:`splash.qtrender_lua.is_wrapped_exposed_object` can be used to check 71 | if a lupa object is a wrapped BaseExposedObject instance; obj.unwrapped() 72 | method can be used to access the underlying Python object. 73 | 74 | 75 | JavaScript -> Python 76 | -------------------- 77 | 78 | To get results from JavaScript to Python they are converted to primitive 79 | JSON-serializable types first. QtWebKit host objects are not used. 80 | Objects of unknown JavaScript types are discared, max depth of result 81 | is limited. 82 | 83 | JavaScript -> Python conversion utilities reside in 84 | 85 | * :mod:`splash.jsutils` module - JavaScript side, i.e. sanitizing and encoding; 86 | two main functions are ``SANITIZE_FUNC_JS`` and ``STORE_DOM_ELEMENTS_JS``; 87 | * :meth:`splash.browser_tab.BrowserTab.evaljs` method - Python side, 88 | i.e. decoding of the result. 89 | 90 | For most types (objects, arrays, numbers, strings) conversion method 91 | is straightforward; the most tricky case is a reference to DOM nodes. 92 | 93 | For top-level DOM nodes (i.e. a result is a DOM node or a NodeList) 94 | a node is stored in a special window attribute, and generated id is returned 95 | to Python instead. All other DOM nodes are discarded - returning a Node 96 | or a NodeList as a part of data structure is not supported at the moment. 97 | ``STORE_DOM_ELEMENTS_JS`` processes Node and NodeList objects; 98 | ``SANITIZE_FUNC_JS`` sanitizes the result (handles all other data types, 99 | drops unsupported data). 100 | 101 | In Python HTMLElement objects are created for DOM nodes; they contain node_id 102 | attribute with id returned by JavaScript; it allows to fetch the real Node 103 | object in JavaScript. This is handled by 104 | :meth:`splash.browser_tab.BrowserTab.evaljs`. 105 | 106 | Python -> JavaScript 107 | -------------------- 108 | 109 | There are two cases Python objects are converted to JavaScript objects: 110 | 111 | 1. functions created with splash:jsfunc() are called with arguments; 112 | 2. methods of HtmlElement which wrap JS functions are called with arguments. 113 | 114 | The conversion is handled either by :func:`splash.html_element.escape_js_args` 115 | or by :func:`splash.jsutils.escape_js`. 116 | 117 | * ``escape_js`` just encodes Python data to JSON and removes quotes; the result 118 | can be used as literal representation of argument values, i.e. added to 119 | a JS function call using string formatting. 120 | * ``escape_js_args`` is similar to ``escape_js``, but it handles 121 | ``splash.html_element.HTMLElement`` instances by replacing them with JS 122 | code to access stored nodes. 123 | -------------------------------------------------------------------------------- /docs/kernel.rst: -------------------------------------------------------------------------------- 1 | .. _splash-jupyter: 2 | 3 | Splash and Jupyter 4 | ================== 5 | 6 | Splash provides a custom Jupyter_ (previously known as IPython_) kernel for Lua. 7 | Together with Jupyter notebook_ frontend it forms an interactive 8 | web-based development environment for Splash Scripts with syntax highlighting, 9 | smart code completion, context-aware help, inline images support and a real 10 | live WebKit browser window with Web Inspector enabled, controllable from 11 | a notebook. 12 | 13 | Installation 14 | ------------ 15 | 16 | To install Splash-Jupyter using Docker, run:: 17 | 18 | $ docker pull scrapinghub/splash-jupyter 19 | 20 | Then start the container:: 21 | 22 | $ docker run -p 8888:8888 -it scrapinghub/splash-jupyter 23 | 24 | .. note:: 25 | 26 | Without ``-it`` flags you won't be able to stop the container using Ctrl-C. 27 | 28 | This command should print something like this:: 29 | 30 | Copy/paste this URL into your browser when you connect for the first time, 31 | to login with a token: 32 | http://localhost:8888/?token=e2435ae336d22b23d5e868d03ce728bc33e73b6159e391ba 33 | 34 | To view Jupyter, open the suggested location in a browser. 35 | It should display an usual Jupyter Notebook overview page. 36 | 37 | .. note:: 38 | 39 | In older Docker setups (e.g. with boot2docker_ on OS X) you may have 40 | to replace 'localhost' with the IP address Docker is available on, 41 | e.g. a result of ``boot2docker ip`` in case of boot2docker or 42 | ``docker-machine ip `` in case of docker-machine_. 43 | 44 | Click "New" button and choose "Splash" in the drop-down list - Splash Notebook 45 | should open. 46 | 47 | Splash Notebook looks like an IPython notebook or other Jupyter-based 48 | notebooks; it allows to run and develop Splash Lua scripts interactively. 49 | For example, try entering ``splash:go("you-favorite-website")`` in a cell, 50 | execute it, then enter ``splash:png()`` in the next cell and run it 51 | as well - you should get a screenshot of the website displayed inline. 52 | 53 | Persistence 54 | ----------- 55 | 56 | By default, notebooks are stored in a Docker container; they are destroyed 57 | when you restart an image. To persist notebooks you can mount a local folder 58 | to ``/notebooks``. For example, let's use current folder to store the 59 | notebooks:: 60 | 61 | $ docker run -v `/bin/pwd`/notebooks:/notebooks -p 8888:8888 -it splash-jupyter 62 | 63 | 64 | Live Webkit window 65 | ------------------ 66 | 67 | To view Live Webkit window with web inspector when Splash-Jupyter is executed 68 | from Docker, you will need to pass additional docker parameters to share the 69 | host system's X server with the docker container 70 | 71 | First you need allow docker to connect to the X server, using:: 72 | 73 | $ xhost +local:docker 74 | 75 | And then use the ``--disable-xvfb`` 76 | command line flag to run Splash-Jupyter:: 77 | 78 | $ docker run -e DISPLAY=unix$DISPLAY \ 79 | -v /tmp/.X11-unix:/tmp/.X11-unix \ 80 | -v $XAUTHORITY:$XAUTHORITY \ 81 | -e XAUTHORITY=$XAUTHORITY \ 82 | -p 8888:8888 \ 83 | -it scrapinghub/splash-jupyter --disable-xvfb 84 | 85 | .. note:: 86 | 87 | The command above is tested on Linux. 88 | 89 | On Windows you can `use VcXsrv `_:: 90 | 91 | $ docker run -e DISPLAY=$DISPLAY ^ 92 | -p 8888:8888 ^ 93 | -it scrapinghub/splash-jupyter --disable-xvfb 94 | 95 | From Notebook to HTTP API 96 | ------------------------- 97 | 98 | After you finished developing the script using Splash Notebook, 99 | you may want to convert it to a form suitable for submitting 100 | to Splash HTTP API (see :ref:`execute` and :ref:`run`). 101 | 102 | To do that, copy-paste (or download using "File -> Download as -> .lua") 103 | all relevant code. For :ref:`run` endpoint add ``return`` statement to 104 | return the final result: 105 | 106 | .. code-block:: lua 107 | 108 | -- Script code goes here, 109 | -- including all helper functions. 110 | return {...} -- return the result 111 | 112 | For :ref:`execute` add ``return`` statement and put the code 113 | inside ``function main(splash)``: 114 | 115 | .. code-block:: lua 116 | 117 | function main(splash) 118 | -- Script code goes here, 119 | -- including all helper functions. 120 | return {...} -- return the result 121 | end 122 | 123 | To make the script more generic you can use :ref:`splash-args` instead of 124 | hardcoded constants (e.g. for page urls). Also, consider submitting several 125 | requests with different arguments instead of running a loop in a script 126 | if you need to visit and process several pages - it is an easy way 127 | to parallelize the work. 128 | 129 | There are some gotchas: 130 | 131 | 1. When you run a notebook cell and then run another notebook cell there 132 | is a delay between runs; the effect is similar to inserting 133 | :ref:`splash-wait` calls at the beginning of each cell. 134 | 2. Regardless of :ref:`sandbox ` settings, scripts in Jupyter 135 | notebook are **not** sandboxed. Usually it is not a problem, 136 | but some functions may be unavailable in HTTP API if sandbox is enabled. 137 | 138 | .. _IPython: http://ipython.org/ 139 | .. _Jupyter: http://jupyter.org/ 140 | .. _notebook: http://ipython.org/notebook.html 141 | .. _Docker: http://docker.io 142 | .. _Boot2Docker: http://boot2docker.io/ 143 | .. _docker-machine: https://docs.docker.com/machine/ 144 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx-rtd-theme 2 | xvfbwrapper 3 | -------------------------------------------------------------------------------- /docs/scripting-binary-data.rst: -------------------------------------------------------------------------------- 1 | .. _binary-data: 2 | 3 | Working with Binary Data 4 | ======================== 5 | 6 | Motivation 7 | ---------- 8 | 9 | Splash assumes that most strings in a script are encoded to UTF-8. 10 | This is true for HTML content - even if the original response was not UTF-8, 11 | internally browser works with UTF-8, so :ref:`splash-html` result is always 12 | UTF-8. 13 | 14 | When you return a Lua table from the ``main`` function Splash encodes it 15 | to JSON; JSON is a text protocol which can't handle arbitrary binary data, 16 | so Splash assumes all strings are UTF-8 when returning a JSON result. 17 | 18 | But sometimes it is necessary to work with binary data: for example, 19 | it could be raw image data returned by :ref:`splash-png` or a response 20 | body of a non-UTF-8 page returned by :ref:`splash-http-get`. 21 | 22 | .. _binary-objects: 23 | 24 | Binary Objects 25 | -------------- 26 | 27 | To pass non-UTF8 data to Splash (returning it as a result of ``main`` or 28 | passing as arguments to ``splash`` methods) a script may mark it as 29 | a binary object using :ref:`treat-as-binary` function. 30 | 31 | Some of the Splash functions already return binary objects: :ref:`splash-png`, 32 | :ref:`splash-jpeg`; :ref:`splash-response-body` attribute is also 33 | a binary object. 34 | 35 | A binary object can be returned as a ``main`` result directly. 36 | It is the reason the following example works 37 | (a basic :ref:`render.png` implementation in Lua): 38 | 39 | .. code-block:: lua 40 | 41 | -- basic render.png emulation 42 | function main(splash) 43 | assert(splash:go(splash.args.url)) 44 | return splash:png() 45 | end 46 | 47 | All binary objects have content-type attached. For example, :ref:`splash-png` 48 | result will have content-type ``image/png``. 49 | 50 | When returned directly, a binary object data is used as-is for the 51 | response body, and Content-Type HTTP header is set to the content-type 52 | of a binary object. So in the previous example the result will be a PNG image 53 | with a proper Content-Type header. 54 | 55 | To construct your own binary objects use :ref:`treat-as-binary` function. 56 | For example, let's return a 1x1px black GIF image as a response: 57 | 58 | .. code-block:: lua 59 | 60 | treat = require("treat") 61 | base64 = require("base64") 62 | 63 | function main(splash) 64 | local gif_b64 = "AQABAIAAAAAAAAAAACH5BAAAAAAALAAAAAABAAEAAAICTAEAOw==" 65 | local gif_bytes = base64.decode(gif_b64) 66 | return treat.as_binary(gif_bytes, "image/gif") 67 | end 68 | 69 | When ``main`` result is returned, binary object content-type takes a priority 70 | over a value set by :ref:`splash-set-result-content-type`. To override 71 | content-type of a binary object create another binary object with a required 72 | content-type: 73 | 74 | .. code-block:: lua 75 | 76 | lcoal treat = require("treat") 77 | function main(splash) 78 | -- ... 79 | local img = splash:png() 80 | return treat.as_binary(img, "image/x-png") -- default was "image/png" 81 | end 82 | 83 | When a binary object is serialized to JSON it is auto-encoded to base64 84 | before serializing. For example, it may happen when a table is returned 85 | as a ``main`` function result: 86 | 87 | .. code-block:: lua 88 | 89 | function main(splash) 90 | assert(splash:go(splash.args.url)) 91 | 92 | -- result is a JSON object {"png": "...base64-encoded image data"} 93 | return {png=splash:png()} 94 | end 95 | -------------------------------------------------------------------------------- /docs/scripting-request-object.rst: -------------------------------------------------------------------------------- 1 | .. _splash-request: 2 | 3 | Request Object 4 | ============== 5 | 6 | Request objects are received by :ref:`splash-on-request` callbacks; 7 | they are also available as :ref:`response.request `. 8 | 9 | .. _splash-request-attributes: 10 | 11 | Attributes 12 | ~~~~~~~~~~ 13 | 14 | Request objects has several attributes with information about a HTTP request. 15 | These fields are for information only; changing them doesn't change 16 | the request to be sent. 17 | 18 | .. _splash-request-url: 19 | 20 | request.url 21 | ----------- 22 | 23 | Requested URL. 24 | 25 | .. _splash-request-method: 26 | 27 | request.method 28 | -------------- 29 | 30 | HTTP method name in upper case, e.g. "GET". 31 | 32 | .. _splash-request-headers: 33 | 34 | request.headers 35 | --------------- 36 | 37 | A Lua table with request HTTP headers (header name => header value). 38 | Keys are header names (strings), values are header values (strings). 39 | 40 | Lookups are case-insensitive, so ``request.headers['content-type']`` 41 | is the same as ``request.headers['Content-Type']``. 42 | 43 | 44 | .. _splash-request-info: 45 | 46 | request.info 47 | ------------ 48 | 49 | A table with request data in `HAR request`_ format. 50 | 51 | .. _HAR request: http://www.softwareishard.com/blog/har-12-spec/#request 52 | 53 | 54 | .. _splash-request-methods: 55 | 56 | Methods 57 | ~~~~~~~ 58 | 59 | To change or drop the request before sending use one of 60 | the ``request`` methods. Note that these methods are only available 61 | before the request is sent (they has no effect if a request is already sent). 62 | Currently it means you can only use them in :ref:`splash-on-request` callbacks. 63 | 64 | .. _splash-request-abort: 65 | 66 | request:abort 67 | ------------- 68 | 69 | Drop the request. 70 | 71 | **Signature:** ``request:abort()`` 72 | 73 | **Returns:** nil. 74 | 75 | **Async:** no. 76 | 77 | .. _splash-request-enable-response-body: 78 | 79 | request:enable_response_body 80 | ---------------------------- 81 | 82 | Enable tracking of response content (i.e. :ref:`splash-response-body` 83 | attribute). 84 | 85 | **Signature:** ``request:enable_response_body()`` 86 | 87 | **Returns:** nil. 88 | 89 | **Async:** no. 90 | 91 | This function allows to enable response content tracking per-request 92 | when :ref:`splash-response-body-enabled` is set to false. 93 | Call it in a :ref:`splash-on-request` callback. 94 | 95 | .. _splash-request-set-url: 96 | 97 | request:set_url 98 | --------------- 99 | 100 | Change request URL to a specified value. 101 | 102 | **Signature:** ``request:set_url(url)`` 103 | 104 | **Parameters:** 105 | 106 | * url - new request URL 107 | 108 | **Returns:** nil. 109 | 110 | **Async:** no. 111 | 112 | 113 | .. _splash-request-set-proxy: 114 | 115 | request:set_proxy 116 | ----------------- 117 | 118 | Set a proxy server to use for this request. 119 | 120 | **Signature:** ``request:set_proxy{host, port, username=nil, password=nil, type='HTTP'}`` 121 | 122 | **Parameters:** 123 | 124 | * host 125 | * port 126 | * username 127 | * password 128 | * type - proxy type; allowed proxy types are 'HTTP' and 'SOCKS5'. 129 | 130 | **Returns:** nil. 131 | 132 | **Async:** no. 133 | 134 | Omit ``username`` and ``password`` arguments if a proxy 135 | doesn't need auth. 136 | 137 | When ``type`` is set to 'HTTP' HTTPS proxying should 138 | also work; it is implemented using CONNECT command. 139 | 140 | 141 | .. _splash-request-set-timeout: 142 | 143 | request:set_timeout 144 | ------------------- 145 | 146 | Set a timeout for this request. 147 | 148 | **Signature:** ``request:set_timeout(timeout)`` 149 | 150 | **Parameters:** 151 | 152 | * timeout - timeout value, in seconds. 153 | 154 | **Returns:** nil. 155 | 156 | **Async:** no. 157 | 158 | If response is not fully received after the timeout, 159 | request is aborted. See also: :ref:`splash-resource-timeout`. 160 | 161 | .. _splash-request-set-header: 162 | 163 | request:set_header 164 | ------------------ 165 | 166 | Set an HTTP header for this request. 167 | 168 | **Signature:** ``request:set_header(name, value)`` 169 | 170 | **Parameters:** 171 | 172 | * name - header name; 173 | * value - header value. 174 | 175 | **Returns:** nil. 176 | 177 | **Async:** no. 178 | 179 | See also: :ref:`splash-set-custom-headers` 180 | 181 | 182 | .. _splash-request-set-http2-enabled: 183 | 184 | request:set_http2_enabled 185 | ------------------------- 186 | 187 | Enable or disable HTTP2 support for this request. 188 | 189 | **Signature:** ``request:set_http2_enabled(true/false)`` 190 | 191 | **Parameters:** 192 | 193 | * value - boolean, whether HTTP2 should be allowed for this request. 194 | 195 | **Returns:** nil. 196 | 197 | **Async:** no. 198 | -------------------------------------------------------------------------------- /docs/scripting-response-object.rst: -------------------------------------------------------------------------------- 1 | .. _splash-response: 2 | 3 | Response Object 4 | =============== 5 | 6 | Response objects are returned as a result of several Splash methods 7 | (like :ref:`splash-http-get` or :ref:`splash-http-post`); they are 8 | are also passed to some of the callbacks (e.g. :ref:`splash-on-response` and 9 | :ref:`splash-on-response-headers` callbacks). These objects contain 10 | information about a response. 11 | 12 | .. _splash-response-url: 13 | 14 | response.url 15 | ------------ 16 | 17 | URL of the response. In case of redirects :ref:`splash-response-url` 18 | is a last URL. 19 | 20 | This field is read-only. 21 | 22 | .. _splash-response-status: 23 | 24 | response.status 25 | --------------- 26 | 27 | HTTP status code of the response. 28 | 29 | This field is read-only. 30 | 31 | .. _splash-response-ok: 32 | 33 | response.ok 34 | ----------- 35 | 36 | ``true`` for successful responses and ``false`` when error happened. 37 | 38 | Example: 39 | 40 | .. code-block:: lua 41 | 42 | local reply = splash:http_get("some-bad-url") 43 | -- reply.ok == false 44 | 45 | This field is read-only. 46 | 47 | .. _splash-response-headers: 48 | 49 | response.headers 50 | ---------------- 51 | 52 | A Lua table with HTTP headers (header name => header value). 53 | Keys are header names (strings), values are header values (strings). 54 | 55 | Lookups are case-insensitive, so ``response.headers['content-type']`` 56 | is the same as ``response.headers['Content-Type']``. 57 | 58 | This field is read-only. 59 | 60 | .. _splash-response-info: 61 | 62 | response.info 63 | ------------- 64 | 65 | A Lua table with response data in `HAR response`_ format. 66 | 67 | This field is read-only. 68 | 69 | .. _HAR response: http://www.softwareishard.com/blog/har-12-spec/#response 70 | 71 | .. _splash-response-body: 72 | 73 | response.body 74 | ------------- 75 | 76 | Raw response body (a :ref:`binary object `). 77 | 78 | If you want to process response body from Lua use :ref:`treat-as-string` 79 | to convert it to a Lua string first. 80 | 81 | :ref:`splash-response-body` attribute is not available by default 82 | in :ref:`splash-on-response` callbacks; use :ref:`splash-response-body-enabled` 83 | or :ref:`splash-request-enable-response-body` to enable it. 84 | 85 | .. _splash-response-request: 86 | 87 | response.request 88 | ---------------- 89 | 90 | A corresponding :ref:`splash-request`. 91 | 92 | This field is read-only. 93 | 94 | .. _splash-response-abort: 95 | 96 | response:abort 97 | -------------- 98 | 99 | **Signature:** ``response:abort()`` 100 | 101 | **Returns:** nil. 102 | 103 | **Async:** no. 104 | 105 | Abort reading of the response body. This method is only available if 106 | a response is not read yet - currently you can use it only 107 | in a :ref:`splash-on-response-headers` callback. 108 | 109 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | # the following libraries are only required by tests 2 | pyasn1 3 | # pyasn-modules >= 0.2.4 4 | pytest >= 5.0.0 5 | service-identity >= 18.1.0 6 | pyOpenSSL >= 19.0.0 7 | cryptography >= 2.5 8 | requests >= 2.21 9 | jsonschema >= 2.0 10 | strict-rfc3339 11 | jupyter_kernel_test 12 | -r requirements-jupyter.txt -------------------------------------------------------------------------------- /requirements-jupyter.txt: -------------------------------------------------------------------------------- 1 | ipykernel==5.1.2 2 | ipython==7.7.0 3 | ipython-genutils==0.2.0 4 | ipywidgets==7.5.1 5 | jupyter==1.0.0 6 | jupyter-client==5.3.1 7 | jupyter-console==6.0.0 8 | jupyter-core==4.5.0 9 | nbconvert==5.6.0 10 | nbformat==4.4.0 11 | notebook==5.7.8 12 | tornado < 6.0 13 | widgetsnbextension==3.5.1 14 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # install PyQt5 (Splash is tested on PyQT 5.13) 2 | # and the following packages: 3 | twisted[http2] == 19.7.0 4 | qt5reactor 5 | psutil 6 | adblockparser >= 0.5 7 | https://github.com/sunu/pyre2/archive/c610be52c3b5379b257d56fc0669d022fd70082a.zip#egg=re2 8 | xvfbwrapper 9 | Pillow > 2.0 10 | attrs >= 18.2.0 11 | 12 | # for scripting support 13 | lupa >= 1.3 14 | funcparserlib >= 0.3.6 15 | -------------------------------------------------------------------------------- /runsplash-docker.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | docker build -t splash-tests -f dockerfiles/tests/Dockerfile . && \ 3 | docker run -it -p8050:8050 --rm --entrypoint "/app/bin/splash" splash-tests "$@" -------------------------------------------------------------------------------- /runtests-docker.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | docker build -t splash-tests -f dockerfiles/tests/Dockerfile . && \ 3 | docker run -it --rm splash-tests "$@" -------------------------------------------------------------------------------- /scripts/rst2inspections.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | This script extracts inspections info for IPython kernel from 5 | Splash reference documentation. 6 | """ 7 | from __future__ import absolute_import 8 | import os 9 | import re 10 | import json 11 | import collections 12 | 13 | 14 | def _parse_doc(doc): 15 | res = collections.OrderedDict() 16 | 17 | m = re.search(r"^splash:(\w+)\s+[-]+\s*$", doc, re.MULTILINE) 18 | res['name'] = m.group(1) if m else None 19 | 20 | header, content = re.split("[-][-]+", doc, maxsplit=1) 21 | res['header'] = header.strip() 22 | res['content'] = content.strip() 23 | 24 | m = re.search(r"((.|[\n\r])+?)\*\*Signature", content, re.MULTILINE) 25 | res['short'] = m.group(1).strip() if m else None 26 | 27 | m = re.search(r"Signature:.*``(.+)``", content) 28 | res['signature'] = m.group(1) if m else None 29 | 30 | m = re.search(r"Returns:\*\*((.|[\n\r])+?)\n\n", content, re.MULTILINE) 31 | res['returns'] = m.group(1).strip() if m else None 32 | 33 | m = re.search(r"Async:\*\*((.|[\n\r])+?)\n\n", content, re.MULTILINE) 34 | res['async'] = m.group(1).strip() if m else None 35 | 36 | m = re.search(r"(?:.|[\n\r])*:\*\*(?:.|[\n\r])+?\n\n?((?:.|[\n\r])+)", content, re.MULTILINE) 37 | res['details'] = m.group(1).strip() if m else None 38 | 39 | m = re.search(r"Parameters:\*\*((.|[\n\r])+?)\*\*Returns:", content, re.MULTILINE) 40 | res['params'] = m.group(1).strip() if m else None 41 | 42 | return res 43 | 44 | 45 | def parse_rst(rst_source): 46 | """ 47 | Parse Sphinx Lua splash methods reference docs and 48 | extract information useful for inspections. 49 | """ 50 | parsed = re.split(r"\.\. _splash-(.+):", rst_source)[1:] 51 | # ids = parsed[::2] 52 | docs = parsed[1::2] 53 | info = [_parse_doc(d) for d in docs] 54 | return collections.OrderedDict( 55 | (d["header"], d) 56 | for d in info 57 | ) 58 | 59 | 60 | def resolve_literalinclude(rst_source, base_dir): 61 | pattern = r"\.\. literalinclude::\s*(.+)\n\s*:language:.+\n" 62 | def repl(m): 63 | path = m.group(1) 64 | with open(os.path.join(base_dir, path), 'rb') as f: 65 | return f.read().decode('utf8') 66 | 67 | return re.sub(pattern, repl, rst_source) 68 | 69 | 70 | def rst2inspections(rst_filename, out_filename): 71 | with open(rst_filename, "rb") as f: 72 | doc = f.read().decode('utf8') 73 | doc = resolve_literalinclude(doc, os.path.dirname(rst_filename)) 74 | info = parse_rst(doc) 75 | 76 | with open(out_filename, "w") as f: 77 | json.dump(info, f, indent=2) 78 | 79 | 80 | if __name__ == '__main__': 81 | root = os.path.join(os.path.dirname(__file__), "..") 82 | rst_filename = os.path.join(root, "docs", "scripting-ref.rst") 83 | out_filename = os.path.join(root, "splash", "kernel", "inspections", "splash-auto.json") 84 | rst2inspections(rst_filename, out_filename) 85 | 86 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 3.5.0 3 | commit = True 4 | tag = True 5 | tag_name = {new_version} 6 | 7 | [bumpversion:file:splash/__init__.py] 8 | serialize = 9 | {major}.{minor}.{patch} 10 | {major}.{minor} 11 | 12 | [bumpversion:file:docs/conf.py] 13 | serialize = 14 | {major}.{minor} 15 | 16 | [tool:pytest] 17 | norecursedirs = bin debian dist docs examples notebooks splash/kernel 18 | doctest_optionflags = ALLOW_UNICODE 19 | addopts = --tb=short 20 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import re 4 | from setuptools import setup 5 | 6 | 7 | def _path(*args): 8 | return os.path.join(os.path.dirname(__file__), *args) 9 | 10 | 11 | def get_version(): 12 | filename = _path('splash', '__init__.py') 13 | with open(filename, 'rb') as fp: 14 | contents = fp.read().decode('utf8') 15 | return re.search(r"__version__ = ['\"](.+)['\"]", contents).group(1) 16 | 17 | 18 | setup_args = { 19 | 'name': 'splash', 20 | 'version': get_version(), 21 | 'url': 'https://github.com/scrapinghub/splash', 22 | 'description': 'A javascript rendered with a HTTP API', 23 | 'long_description': open('README.rst').read(), 24 | 'author': 'Scrapinghub', 25 | 'author_email': 'info@scrapinghub.com', 26 | 'maintainer': 'Scrapinghub', 27 | 'maintainer_email': 'info@scrapinghub.com', 28 | 'license': 'BSD', 29 | 'scripts': ['bin/splash'], 30 | 'packages': ['splash', 'splash.har', 'splash.kernel', 31 | 'splash.engines.chromium', 'splash.engines.webkit'], 32 | 'package_data': {'splash': [ 33 | 'vendor/harviewer/webapp/css/*.css', 34 | 'vendor/harviewer/webapp/css/images/*.*', 35 | 'vendor/harviewer/webapp/css/images/menu/*.*', 36 | 'vendor/harviewer/webapp/scripts/*.*', 37 | 'vendor/harviewer/webapp/scripts/core/*.*', 38 | 'vendor/harviewer/webapp/scripts/domplate/*.*', 39 | 'vendor/harviewer/webapp/scripts/downloadify/js/*.*', 40 | 'vendor/harviewer/webapp/scripts/downloadify/src/*.*', 41 | 'vendor/harviewer/webapp/scripts/downloadify/media/*.*', 42 | 'vendor/harviewer/webapp/scripts/excanvas/*.*', 43 | 'vendor/harviewer/webapp/scripts/jquery-plugins/*.*', 44 | 'vendor/harviewer/webapp/scripts/json-query/*.*', 45 | 'vendor/harviewer/webapp/scripts/nls/*.*', 46 | 'vendor/harviewer/webapp/scripts/preview/*.*', 47 | 'vendor/harviewer/webapp/scripts/syntax-highlighter/*.js', 48 | 'vendor/harviewer/webapp/scripts/tabs/*.*', 49 | 'vendor/harviewer/webapp/har.js', 50 | 'ui/*.*', 51 | 52 | 'examples/*.lua', 53 | 'lua_modules/*.lua', 54 | 'lua_modules/libs/*.lua', 55 | 'lua_modules/vendor/*.lua', 56 | 'kernel/inspections/*.json', 57 | 'kernel/kernels/splash-py2/*.*', 58 | 'kernel/kernels/splash-py3/*.*', 59 | ]}, 60 | 'zip_safe': False, 61 | 'install_requires': [ 62 | 'Twisted[http2] >= 19.7.0', 63 | 'qt5reactor', 64 | 'psutil', 65 | 'adblockparser', 66 | 'xvfbwrapper', 67 | 'funcparserlib', 68 | 'Pillow >= 3.4.2', 69 | 'attrs >= 18.2.0', 70 | ], 71 | 'classifiers': [ 72 | 'Programming Language :: Python', 73 | 'Programming Language :: Python :: 3', 74 | 'Programming Language :: Python :: 3.5', 75 | 'Programming Language :: Python :: 3.6', 76 | 'License :: OSI Approved :: BSD License', 77 | 'Operating System :: OS Independent', 78 | 'Development Status :: 5 - Production/Stable', 79 | 'Environment :: Console', 80 | 'Environment :: No Input/Output (Daemon)', 81 | 'Topic :: Internet :: WWW/HTTP', 82 | ], 83 | } 84 | 85 | 86 | setup(**setup_args) 87 | -------------------------------------------------------------------------------- /splash/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '3.5' 2 | 3 | from distutils.version import LooseVersion 4 | version_info = tuple(LooseVersion(__version__).version) 5 | __all__ = ['__version__', 'version_info'] 6 | -------------------------------------------------------------------------------- /splash/_cmdline_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import optparse 3 | 4 | ONOFF = {True: "enabled", False: "disabled"} 5 | 6 | 7 | def comma_separated_callback(*, is_valid_func=lambda v: True, 8 | error_msg="{invalid} is not an allowed value"): 9 | """ Return an optparse callback for comma-separated args. 10 | Default value is not processed. 11 | 12 | Usage:: 13 | 14 | my_callback = comma_separated_callback( 15 | is_valid_func=lambda v: v in {'foo', 'bar'}, 16 | error_msg="{invalid} is not an allowed value for --option-name") 17 | 18 | op.add_option("--option-name", 19 | default=[], 20 | action='callback', 21 | type='string', 22 | callback=my_callback) 23 | 24 | """ 25 | def callback(option, opt, value, parser): 26 | """ optparse callback for comma-separated args """ 27 | values = value.split(',') 28 | for v in values: 29 | if not is_valid_func(v): 30 | msg = error_msg.format(value=value, invalid=v) 31 | raise optparse.OptionValueError(msg) 32 | setattr(parser.values, option.dest, values) 33 | return callback 34 | -------------------------------------------------------------------------------- /splash/argument_cache.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import json 3 | import hashlib 4 | from collections import OrderedDict 5 | 6 | 7 | class ArgumentCache(object): 8 | """ 9 | >>> cache = ArgumentCache() 10 | >>> "foo" in cache 11 | False 12 | >>> cache['foo'] 13 | Traceback (most recent call last): 14 | ... 15 | KeyError: 'foo' 16 | >>> len(cache) 17 | 0 18 | >>> key = cache.add("Hello, world!") 19 | >>> key 20 | 'bea2c9d7fd040292e0424938af39f7d6334e8d8a' 21 | >>> cache[key] 22 | 'Hello, world!' 23 | >>> key in cache 24 | True 25 | >>> len(cache) 26 | 1 27 | >>> cache.get_missing([ 28 | ... ('bar', key), 29 | ... ('baz', '1111111111111111111111111111111111111111'), 30 | ... ]) 31 | ['baz'] 32 | >>> cache.add_many(['value1', 'value2']) 33 | ['daf626c4ebd6bdd697e043111454304e5fb1459e', '849988af22dbd04d3e353caf77f9d81241ca9ee2'] 34 | >>> cache['daf626c4ebd6bdd697e043111454304e5fb1459e'] 35 | 'value1' 36 | >>> cache['849988af22dbd04d3e353caf77f9d81241ca9ee2'] 37 | 'value2' 38 | >>> cache[key] 39 | 'Hello, world!' 40 | >>> len(cache) 41 | 3 42 | >>> cache.clear() 43 | >>> len(cache) 44 | 0 45 | 46 | Size of ArgumentCache can be limited: 47 | 48 | >>> cache = ArgumentCache(0) 49 | Traceback (most recent call last): 50 | ... 51 | ValueError: maxsize must be greater than 0 52 | >>> cache = ArgumentCache(2) # limit it to 2 elements 53 | >>> cache.add_many(['value1', 'value2']) 54 | ['daf626c4ebd6bdd697e043111454304e5fb1459e', '849988af22dbd04d3e353caf77f9d81241ca9ee2'] 55 | >>> len(cache) 56 | 2 57 | >>> cache.add("Hello, world!") 58 | 'bea2c9d7fd040292e0424938af39f7d6334e8d8a' 59 | >>> len(cache) 60 | 2 61 | >>> cache["bea2c9d7fd040292e0424938af39f7d6334e8d8a"] 62 | 'Hello, world!' 63 | >>> cache['849988af22dbd04d3e353caf77f9d81241ca9ee2'] 64 | 'value2' 65 | >>> cache['daf626c4ebd6bdd697e043111454304e5fb1459e'] 66 | Traceback (most recent call last): 67 | ... 68 | KeyError: 'daf626c4ebd6bdd697e043111454304e5fb1459e' 69 | >>> cache.add("foo") 70 | 'd465e627f9946f2fa0d2dc0fc04e5385bc6cd46d' 71 | >>> len(cache) 72 | 2 73 | >>> 'bea2c9d7fd040292e0424938af39f7d6334e8d8a' in cache 74 | False 75 | """ 76 | def __init__(self, maxsize=None): 77 | if maxsize is None: 78 | maxsize = float("+inf") 79 | if maxsize <= 0: 80 | raise ValueError("maxsize must be greater than 0") 81 | self.maxsize = maxsize 82 | self._values = OrderedDict() 83 | 84 | def add(self, value): 85 | key = self.get_key(value) 86 | if key in self._values: 87 | del self._values[key] 88 | else: 89 | while len(self._values) >= self.maxsize: 90 | self._values.popitem(last=False) 91 | self._values[key] = value 92 | return key 93 | 94 | def __getitem__(self, key): 95 | self._values.move_to_end(key) 96 | return self._values[key] 97 | 98 | def __contains__(self, key): 99 | return key in self._values 100 | 101 | def __len__(self): 102 | return len(self._values) 103 | 104 | def clear(self): 105 | self._values.clear() 106 | 107 | def get_missing(self, items): 108 | return [name for name, key in items if key not in self] 109 | 110 | def add_many(self, values): 111 | """ 112 | Add all values from ``values`` list to cache. Return a list of keys. 113 | """ 114 | return [self.add(value) for value in values] 115 | 116 | @classmethod 117 | def get_key(cls, value): 118 | value_json = json.dumps(value, sort_keys=True, ensure_ascii=False) 119 | return hashlib.sha1(value_json.encode('utf8')).hexdigest() 120 | -------------------------------------------------------------------------------- /splash/conftest.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from splash import lua 3 | 4 | collect_ignore = [] 5 | 6 | if not lua.is_supported(): 7 | collect_ignore = [ 8 | 'lua.py', 9 | 'lua_runner.py', 10 | 'lua_runtime.py', 11 | 'qtrender_lua.py', 12 | 'kernel/completer.py', 13 | 'kernel/kernel.py', 14 | 'kernel/__main__.py', 15 | 'kernel/__init__.py', 16 | ] 17 | -------------------------------------------------------------------------------- /splash/cookies.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from PyQt5.QtCore import QDateTime, Qt 3 | from PyQt5.QtNetwork import QNetworkRequest, QNetworkCookie, QNetworkCookieJar 4 | 5 | from splash.utils import to_unicode, to_bytes 6 | from splash.qtutils import to_qurl 7 | 8 | 9 | class SplashCookieJar(QNetworkCookieJar): 10 | def update_cookie_header(self, request): 11 | """ Use this cookiejar to set Cookie: request header """ 12 | if not _should_send_cookies(request): 13 | return 14 | 15 | cookies = self.cookiesForUrl(request.url()) 16 | if not cookies: 17 | return 18 | 19 | request.setRawHeader(b"Cookie", _cookies_to_raw(cookies)) 20 | 21 | def fill_from_reply(self, reply): 22 | """ Add cookies from the reply to the cookiejar """ 23 | # based on QNetworkReplyImplPrivate::metaDataChanged C++ code 24 | if not _should_save_cookies(reply.request()): 25 | return 26 | cookies = reply.header(QNetworkRequest.SetCookieHeader) 27 | if not cookies: 28 | return 29 | self.setCookiesFromUrl(cookies, reply.url()) 30 | 31 | def delete(self, name=None, url=None): 32 | """ 33 | Remove all cookies with a passed name for the passed url. 34 | Return a number of cookies deleted. 35 | """ 36 | all_cookies = self.allCookies() 37 | if url is None: 38 | new_cookies = [c for c in all_cookies if 39 | to_unicode(bytes(c.name())) != name] 40 | else: 41 | remove_cookies = self.cookiesForUrl(to_qurl(url)) 42 | if name is not None: 43 | remove_cookies = [c for c in remove_cookies if 44 | to_unicode(bytes(c.name())) == name] 45 | to_remove = {self._cookie_fp(c) for c in remove_cookies} 46 | new_cookies = [ 47 | c for c in all_cookies if self._cookie_fp(c) not in to_remove 48 | ] 49 | 50 | self.setAllCookies(new_cookies) 51 | return len(all_cookies) - len(new_cookies) 52 | 53 | @classmethod 54 | def _cookie_fp(cls, cookie): 55 | return bytes(cookie.toRawForm(QNetworkCookie.Full)) 56 | 57 | def clear(self): 58 | """ Remove all cookies. Return a number of cookies deleted. """ 59 | old_size = len(self.allCookies()) 60 | self.setAllCookies([]) 61 | return old_size 62 | 63 | def init(self, cookies): 64 | """ 65 | Replace current cookies with ``cookies``. The argument should 66 | be a list of Python dicts with cookie data in HAR format. 67 | """ 68 | qt_cookies = [self.har_cookie2qt(c) for c in cookies] 69 | self.setAllCookies(qt_cookies) 70 | 71 | def add(self, cookie): 72 | """ 73 | Add a cookie. Cookie should be a Python dict with cookie 74 | data in HAR format. 75 | """ 76 | cookies = list(self.allCookies()) 77 | cookies.append(self.har_cookie2qt(cookie)) 78 | self.setAllCookies(cookies) 79 | 80 | @classmethod 81 | def har_cookie2qt(cls, cookie): 82 | qcookie = QNetworkCookie() 83 | qcookie.setName(to_bytes(cookie["name"])) 84 | qcookie.setValue(to_bytes(cookie["value"])) 85 | 86 | if 'domain' in cookie: 87 | qcookie.setDomain(cookie["domain"]) 88 | 89 | if 'httpOnly' in cookie: 90 | qcookie.setHttpOnly(cookie["httpOnly"]) 91 | 92 | if 'secure' in cookie: 93 | qcookie.setSecure(cookie["secure"]) 94 | 95 | if 'path' in cookie: 96 | qcookie.setPath(cookie["path"]) 97 | 98 | if cookie.get('expires'): 99 | expires = QDateTime.fromString(cookie["expires"], Qt.ISODate) 100 | qcookie.setExpirationDate(expires) 101 | 102 | return qcookie 103 | 104 | 105 | def _should_send_cookies(request): 106 | """ Return True if cookies should be sent for a request """ 107 | # based on QNetworkAccessManager::createRequest() C++ code 108 | attr = request.attribute( 109 | QNetworkRequest.CookieLoadControlAttribute, 110 | QNetworkRequest.Automatic 111 | ) 112 | return attr == QNetworkRequest.Automatic 113 | 114 | 115 | def _should_save_cookies(request): 116 | """ Return True if cookies should be saved for a request """ 117 | # based on QNetworkReplyImplPrivate::metaDataChanged() C++ code 118 | attr = request.attribute( 119 | QNetworkRequest.CookieSaveControlAttribute, 120 | QNetworkRequest.Automatic 121 | ) 122 | return attr == QNetworkRequest.Automatic 123 | 124 | 125 | def _cookies_to_raw(cookies): 126 | """ Build raw Cookie: header value from a list of QNetworkCookie instances """ 127 | # based on QNetworkRequest::fromheaderValue() C++ code 128 | return b"; ".join( 129 | bytes(cookie.toRawForm(QNetworkCookie.NameAndValueOnly)) 130 | for cookie in cookies 131 | ) 132 | -------------------------------------------------------------------------------- /splash/defaults.py: -------------------------------------------------------------------------------- 1 | # browser engines 2 | BROWSER_ENGINES_ENABLED = "webkit,chromium" 3 | 4 | # timeouts 5 | TIMEOUT = 30 6 | WAIT_TIME = 0.0 7 | RESOURCE_TIMEOUT = 0.0 8 | 9 | MAX_TIMEOUT = 90.0 10 | 11 | # Default size of browser window. As there're no decorations, this affects 12 | # both "window.inner*" and "window.outer*" values. 13 | VIEWPORT_SIZE = '1024x768' 14 | 15 | # Window size limitations. 16 | VIEWPORT_MAX_WIDTH = 40000 17 | VIEWPORT_MAX_HEIGTH = 40000 18 | VIEWPORT_MAX_AREA = 1280 * VIEWPORT_MAX_HEIGTH # support max height @ 1280 width 19 | 20 | MAX_WIDTH = 1920 21 | MAX_HEIGTH = 1080 22 | 23 | AUTOLOAD_IMAGES = 1 24 | 25 | # If 'raster', PNG images will be rescaled after rendering as regular images. 26 | # If 'vector', PNG image will be rescaled during rendering which is faster and 27 | # crisper, but may cause rendering artifacts. 28 | IMAGE_SCALE_METHOD = 'raster' 29 | 30 | # This value has the same meaning as "level" kwarg of :func:`zlib.compress`: 31 | # - 0 means no compression at all 32 | # - 1 means best speed, lowest compression ratio 33 | # - 9 means best compression, lowest speed 34 | # 35 | # The default is 1, because it is twice as fast as 9 and produces only 15% 36 | # larger files. 37 | PNG_COMPRESSION_LEVEL = 1 38 | 39 | # 75 is Pillow default. Values above 95 should be avoided; 40 | # 100 disables portions of the JPEG compression algorithm, 41 | # and results in large files with hardly any gain in image quality. 42 | JPEG_QUALITY = 75 43 | 44 | # There's a bug in Qt that manifests itself when width or height of rendering 45 | # surface (aka the png image) is more than 32768. Usually, this is solved by 46 | # rendering the image in tiled manner and obviously, TILE_MAXSIZE must not 47 | # exceed that value. 48 | # 49 | # Other than that, the setting is a tradeoff between performance and memory 50 | # usage, because QImage that acts as a rendering surface is quite a resource 51 | # hog. So, if you increase tile size you may end up using a lot more memory, 52 | # but there is less image pasting and the rendering is faster. As of now, 2048 53 | # size is chosen to fit commonly used 1080p resolution in one tile. 54 | TILE_MAXSIZE = 2048 55 | 56 | # defaults for render.json endpoint 57 | DO_HTML = 0 58 | DO_IFRAMES = 0 59 | DO_PNG = 0 60 | DO_JPEG = 0 61 | SHOW_SCRIPT = 0 62 | SHOW_CONSOLE = 0 63 | SHOW_HISTORY = 0 64 | SHOW_HAR = 0 65 | 66 | # servers 67 | SPLASH_PORT = 8050 68 | SPLASH_IP = '0.0.0.0' 69 | 70 | # pool options 71 | SLOTS = 20 72 | 73 | # argument cache option 74 | ARGUMENT_CACHE_MAX_ENTRIES = 500 75 | 76 | # security options 77 | ALLOWED_SCHEMES = ['http', 'https', 'data', 'ftp', 'sftp', 'ws', 'wss'] 78 | JS_CROSS_DOMAIN_ENABLED = False 79 | PRIVATE_MODE = True 80 | 81 | # logging 82 | VERBOSITY = 1 83 | 84 | # plugins (e.g. flash) 85 | PLUGINS_ENABLED = False 86 | 87 | # request content 88 | REQUEST_BODY_ENABLED = False 89 | 90 | # response content 91 | RESPONSE_BODY_ENABLED = False 92 | 93 | # IndexedDB 94 | INDEXEDDB_ENABLED = False 95 | 96 | # HTML5 media 97 | HTML5_MEDIA_ENABLED = False 98 | 99 | # WebGL 100 | WEBGL_ENABLED = True 101 | 102 | # Media Source Extension API 103 | # it is enabled by default at least to make html5test.com work 104 | MEDIA_SOURCE_ENABLED = True 105 | 106 | # HTTP2 107 | CHROMIUM_HTTP2_ENABLED = True 108 | WEBKIT_HTTP2_ENABLED = False 109 | -------------------------------------------------------------------------------- /splash/engines/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /splash/engines/chromium/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from .browser_tab import ChromiumBrowserTab -------------------------------------------------------------------------------- /splash/engines/chromium/constants.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from PyQt5.QtWebEngineWidgets import QWebEnginePage 3 | 4 | 5 | RenderProcessTerminationStatus = { 6 | QWebEnginePage.NormalTerminationStatus: "The render process terminated normally.", 7 | QWebEnginePage.AbnormalTerminationStatus: "The render process terminated with with a non-zero exit status.", 8 | QWebEnginePage.CrashedTerminationStatus: "The render process crashed, for example because of a segmentation fault.", 9 | QWebEnginePage.KilledTerminationStatus: "The render process was killed, for example by SIGKILL or task manager kill.", 10 | } 11 | -------------------------------------------------------------------------------- /splash/engines/chromium/render_scripts.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import abc 3 | 4 | from twisted.internet import defer 5 | from twisted.python.failure import Failure 6 | 7 | from splash import defaults 8 | from splash.engines.chromium import ChromiumBrowserTab 9 | from splash.errors import BadOption, InternalError 10 | from splash.render_scripts import ( 11 | BaseRenderScript, 12 | BaseFixedRenderScript, 13 | stop_on_error, 14 | ) 15 | 16 | 17 | class ChromiumRenderScript(BaseRenderScript): 18 | """ Base class for Chromium-based render scripts """ 19 | def __init__(self, render_options, verbosity, **kwargs): 20 | super().__init__(render_options, verbosity) 21 | self.tab = ChromiumBrowserTab( 22 | render_options=render_options, 23 | verbosity=verbosity, 24 | ) 25 | 26 | 27 | class ChromiumDefaultRenderScript(ChromiumRenderScript, BaseFixedRenderScript): 28 | 29 | def start(self, url, baseurl=None, wait=None, viewport=None, 30 | js_source=None, js_profile=None, images=None, console=False, 31 | headers=None, http_method='GET', body=None, 32 | render_all=False, resource_timeout=None, request_body=False, 33 | response_body=False, html5_media=False, http2=True): 34 | self.url = url 35 | self.wait_time = defaults.WAIT_TIME if wait is None else wait 36 | # self.js_source = js_source 37 | # self.js_profile = js_profile 38 | # self.console = console 39 | self.viewport = defaults.VIEWPORT_SIZE if viewport is None else viewport 40 | self.render_all = render_all or viewport == 'full' 41 | 42 | # FIXME: BadOption errors are logged as unhandled errors 43 | if baseurl is not None: 44 | raise BadOption("baseurl is not implemented") 45 | 46 | if js_source is not None: 47 | raise BadOption("js_source is not implemented") 48 | 49 | if js_profile is not None: 50 | raise BadOption("js_profile is not implemented") 51 | 52 | if images is False: 53 | raise BadOption("images is not implemented") 54 | 55 | if console is True: 56 | raise BadOption("console is not implemented") 57 | 58 | if headers is not None: 59 | raise BadOption("headers is not implemented") 60 | 61 | if http_method != 'GET': 62 | raise BadOption("http_method is not implemented") 63 | 64 | if body is not None: 65 | raise BadOption("body is not implemented") 66 | 67 | if resource_timeout is not None and resource_timeout > 0: 68 | raise BadOption("resource_timeout is not implemented") 69 | 70 | if request_body is True: 71 | raise BadOption("request_body is not implemented") 72 | 73 | if response_body is True: 74 | raise BadOption("response_body is not implemented") 75 | 76 | if html5_media is True: 77 | raise BadOption("html5_media is not implemented") 78 | 79 | if render_all is True: 80 | raise BadOption("render_all is not implemented") 81 | 82 | # if resource_timeout: 83 | # self.tab.set_resource_timeout(resource_timeout) 84 | 85 | # if images is not None: 86 | # self.tab.set_images_enabled(images) 87 | 88 | if self.viewport != 'full': 89 | self.tab.set_viewport(self.viewport) 90 | 91 | # self.tab.set_request_body_enabled(request_body) 92 | # self.tab.set_response_body_enabled(response_body) 93 | # self.tab.set_html5_media_enabled(html5_media) 94 | 95 | if not http2: 96 | raise BadOption("Disabling of http2 is not implemented " 97 | "for Chromium") 98 | 99 | self.tab.go( 100 | url=url, 101 | callback=self.on_goto_load_finished, 102 | errback=self.on_goto_load_error, 103 | # baseurl=baseurl, 104 | # http_method=http_method, 105 | # body=body, 106 | # headers=headers, 107 | ) 108 | 109 | @stop_on_error 110 | def _load_finished_ok(self): 111 | super()._load_finished_ok() 112 | # self.tab.store_har_timing("_onPrepareStart") 113 | 114 | # self._prepare_render() 115 | if self.viewport == 'full': 116 | self.tab.set_viewport(self.viewport) 117 | 118 | d = defer.maybeDeferred(self.get_result) 119 | d.addCallback(self.return_result) 120 | d.addErrback(self._return_internal_error) 121 | 122 | def _return_internal_error(self, failure: Failure): 123 | self.return_error(InternalError(str(failure.value))) 124 | 125 | @abc.abstractmethod 126 | def get_result(self): 127 | return None 128 | 129 | 130 | class ChromiumRenderHtmlScript(ChromiumDefaultRenderScript): 131 | def get_result(self): 132 | return self.tab.html() 133 | 134 | 135 | class _ChromiumRenderImageScript(ChromiumDefaultRenderScript): 136 | def start(self, **kwargs): 137 | self.width = kwargs.pop('width') 138 | self.height = kwargs.pop('height') 139 | self.scale_method = kwargs.pop('scale_method') 140 | return super().start(**kwargs) 141 | 142 | 143 | class ChromiumRenderPngScript(_ChromiumRenderImageScript): 144 | def get_result(self): 145 | return self.tab.png(self.width, self.height, 146 | render_all=self.render_all, 147 | scale_method=self.scale_method) 148 | 149 | 150 | class ChromiumRenderJpegScript(_ChromiumRenderImageScript): 151 | def start(self, **kwargs): 152 | self.quality = kwargs.pop('quality') 153 | return super().start(**kwargs) 154 | 155 | def get_result(self): 156 | return self.tab.jpeg( 157 | self.width, self.height, render_all=self.render_all, 158 | scale_method=self.scale_method, quality=self.quality) 159 | 160 | 161 | -------------------------------------------------------------------------------- /splash/engines/chromium/screenshot.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from PyQt5.QtCore import QSize, QRect 3 | from PyQt5.QtGui import QPainter 4 | 5 | from splash.qtrender_image import ( 6 | BaseQtScreenshotRenderer, WrappedImage, 7 | WrappedQImage, 8 | ) 9 | 10 | 11 | class QtChromiumScreenshotRenderer(BaseQtScreenshotRenderer): 12 | 13 | def __init__(self, web_page, logger=None, image_format=None, 14 | width=None, height=None, scale_method=None, region=None): 15 | """ Initialize renderer. 16 | 17 | :type web_page: PyQt5.QtWebEngineWidgets.QWebEnginePage 18 | :type logger: splash.log.SplashLogger 19 | :type image_format: str {'PNG', 'JPEG'} 20 | :type width: int 21 | :type height: int 22 | :type scale_method: str {'raster', 'vector'} 23 | :type region: (int, int, int, int) 24 | """ 25 | if region is not None: 26 | raise ValueError("region argument is not supported yet") 27 | 28 | super().__init__(web_page=web_page, logger=logger, 29 | image_format=image_format, width=width, 30 | height=height, scale_method=scale_method, 31 | region=region) 32 | 33 | def get_web_viewport_size(self) -> QSize: 34 | """ Return size of the current viewport """ 35 | return self.web_page.view().size() 36 | 37 | def _render_qwebpage_full(self, 38 | web_rect: QRect, 39 | render_rect: QRect, 40 | canvas_size: QSize, 41 | ) -> 'WrappedImage': 42 | """ Render web page in one step. """ 43 | if self._qpainter_needs_tiling(render_rect, canvas_size): 44 | # If this condition is true, this function may get stuck. 45 | raise ValueError("Rendering region is too large to be drawn" 46 | " in one step, use tile-by-tile renderer instead") 47 | canvas = self.img_converter.new_qimage(canvas_size) 48 | painter = QPainter(canvas) 49 | try: 50 | painter.setRenderHint(QPainter.Antialiasing, True) 51 | painter.setRenderHint(QPainter.TextAntialiasing, True) 52 | painter.setRenderHint(QPainter.SmoothPixmapTransform, True) 53 | painter.setWindow(web_rect) 54 | painter.setViewport(render_rect) 55 | painter.setClipRect(web_rect) 56 | # self.web_page.mainFrame().render(painter) 57 | self.web_page.view().render(painter) 58 | finally: 59 | painter.end() 60 | return WrappedQImage(canvas) 61 | 62 | def _render_qwebpage_tiled(self, 63 | web_rect: QRect, 64 | render_rect: QRect, 65 | canvas_size: QSize, 66 | ) -> 'WrappedImage': 67 | raise ValueError("tiling is required, but it is not implemented") 68 | 69 | -------------------------------------------------------------------------------- /splash/engines/chromium/webpage.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from twisted.python import log 3 | from PyQt5.QtWebEngineWidgets import QWebEnginePage, QWebEngineProfile 4 | 5 | 6 | class ChromiumWebPage(QWebEnginePage): 7 | def __init__(self, profile: QWebEngineProfile, verbosity: int = 0) -> None: 8 | super(QWebEnginePage, self).__init__(profile, None) 9 | self.verbosity = verbosity 10 | profile.setParent(self) 11 | 12 | def javaScriptAlert(self, url, msg): 13 | # TODO: callback 14 | if self.verbosity > 1: 15 | log.msg("javaScriptAlert, url=%r, msg=%r" % (url, msg)) 16 | return 17 | 18 | def javaScriptConfirm(self, url, msg): 19 | if self.verbosity > 1: 20 | log.msg("javaScriptConfirm, url=%r, msg=%r" % (url, msg)) 21 | return False 22 | 23 | def javaScriptPrompt(self, url, msg, default=None): 24 | if self.verbosity > 1: 25 | log.msg("javaScriptPrompt, url=%r, msg=%r, default=%r" % ( 26 | url, msg, default)) 27 | return False, '' 28 | 29 | -------------------------------------------------------------------------------- /splash/engines/webkit/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from .browser_tab import WebkitBrowserTab -------------------------------------------------------------------------------- /splash/engines/webkit/webview.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from PyQt5.QtWebKitWidgets import QWebView 3 | 4 | 5 | class SplashQWebView(QWebView): 6 | """ 7 | QWebView subclass that handles 'close' requests. By default, 8 | it doesn't prevent closing, but an user can assign web_view.onBeforeClose 9 | function which can prevent it. 10 | """ 11 | onBeforeClose = None 12 | 13 | def closeEvent(self, event): 14 | dont_close = False 15 | if self.onBeforeClose: 16 | dont_close = self.onBeforeClose() 17 | 18 | if dont_close: 19 | event.ignore() 20 | else: 21 | event.accept() 22 | 23 | 24 | -------------------------------------------------------------------------------- /splash/errors.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import attr 3 | 4 | 5 | @attr.s 6 | class RenderErrorInfo: 7 | type = attr.ib() 8 | code = attr.ib() 9 | text = attr.ib() 10 | url = attr.ib() 11 | 12 | 13 | class BadOption(Exception): 14 | """ Incorrect HTTP API arguments """ 15 | pass 16 | 17 | 18 | class RenderError(Exception): 19 | """ Error rendering page """ 20 | pass 21 | 22 | 23 | class InternalError(Exception): 24 | """ Unhandled internal error """ 25 | pass 26 | 27 | 28 | class CancelledError(Exception): 29 | """ Render is cancelled """ 30 | 31 | 32 | class GlobalTimeoutError(Exception): 33 | """ Timeout exceeded rendering page """ 34 | pass 35 | 36 | 37 | class UnsupportedContentType(Exception): 38 | """ Request Content-Type is not supported """ 39 | pass 40 | 41 | 42 | class ExpiredArguments(Exception): 43 | """ Arguments stored with ``save_args`` are expired """ 44 | pass 45 | 46 | 47 | class ScriptError(BadOption): 48 | """ Error happened while executing Lua script """ 49 | LUA_INIT_ERROR = 'LUA_INIT_ERROR' # error happened before coroutine starts 50 | LUA_ERROR = 'LUA_ERROR' # lua error() is called from the coroutine 51 | LUA_CONVERT_ERROR = 'LUA_CONVERT_ERROR' # result can't be converted to Python 52 | SPLASH_LUA_ERROR = 'SPLASH_LUA_ERROR' # custom error raised by Splash 53 | BAD_MAIN_ERROR = 'BAD_MAIN_ERROR' # main() definition is incorrect 54 | MAIN_NOT_FOUND_ERROR = 'MAIN_NOT_FOUND_ERROR' # main() is not found 55 | SYNTAX_ERROR = 'SYNTAX_ERROR' # XXX: unused; reported as INIT_ERROR now 56 | JS_ERROR = 'JS_ERROR' # error in a wrapped JS function 57 | UNKNOWN_ERROR = 'UNKNOWN_ERROR' 58 | 59 | 60 | class JsError(Exception): 61 | """ Error occured in JavaScript code """ 62 | pass 63 | 64 | 65 | class DOMError(Exception): 66 | """ Error occurred during DOM operations""" 67 | NOT_IN_DOM_ERROR = 'NOT_IN_DOM_ERROR' 68 | NOT_COMPATIBLE_NODE_ERROR = 'NOT_COMPATIBLE_NODE_ERROR' 69 | 70 | 71 | -------------------------------------------------------------------------------- /splash/examples/block-css.lua: -------------------------------------------------------------------------------- 1 | function main(splash, args) 2 | splash:on_response_headers(function(response) 3 | local ct = response.headers["Content-Type"] 4 | if ct == "text/css" then 5 | response.abort() 6 | end 7 | end) 8 | 9 | assert(splash:go(args.url)) 10 | return { 11 | png=splash:png(), 12 | har=splash:har() 13 | } 14 | end -------------------------------------------------------------------------------- /splash/examples/call-later.lua: -------------------------------------------------------------------------------- 1 | function main(splash, args) 2 | local snapshots = {} 3 | local timer = splash:call_later(function() 4 | snapshots["a"] = splash:html() 5 | splash:wait(1.0) 6 | snapshots["b"] = splash:html() 7 | end, 1.5) 8 | assert(splash:go(args.url)) 9 | splash:wait(3.0) 10 | timer:reraise() 11 | 12 | return snapshots 13 | end -------------------------------------------------------------------------------- /splash/examples/count-divs.lua: -------------------------------------------------------------------------------- 1 | function main(splash, args) 2 | local get_div_count = splash:jsfunc([[ 3 | function () { 4 | var body = document.body; 5 | var divs = body.getElementsByTagName('div'); 6 | return divs.length; 7 | } 8 | ]]) 9 | splash:go(args.url) 10 | 11 | return ("There are %s DIVs in %s"):format( 12 | get_div_count(), args.url) 13 | end -------------------------------------------------------------------------------- /splash/examples/disable-images.lua: -------------------------------------------------------------------------------- 1 | function main(splash, args) 2 | splash.images_enabled = false 3 | assert(splash:go(splash.args.url)) 4 | return {png=splash:png()} 5 | end -------------------------------------------------------------------------------- /splash/examples/element-screenshot.lua: -------------------------------------------------------------------------------- 1 | -- This in an example of how to use lower-level 2 | -- Splash functions to get element screenshot. 3 | -- 4 | -- In practice use splash:select("a"):png{pad=32}. 5 | 6 | 7 | -- this function adds padding around region 8 | function pad(r, pad) 9 | return {r[1]-pad, r[2]-pad, r[3]+pad, r[4]+pad} 10 | end 11 | 12 | function main(splash, args) 13 | -- this function returns element bounding box 14 | local get_bbox = splash:jsfunc([[ 15 | function(css) { 16 | var el = document.querySelector(css); 17 | var r = el.getBoundingClientRect(); 18 | return [r.left, r.top, r.right, r.bottom]; 19 | } 20 | ]]) 21 | 22 | -- main script 23 | assert(splash:go(splash.args.url)) 24 | assert(splash:wait(0.5)) 25 | 26 | -- don't crop image by a viewport 27 | splash:set_viewport_full() 28 | 29 | -- let's get a screenshot of a first 30 | -- element on a page, with extra 32px around it 31 | local region = pad(get_bbox("a"), 32) 32 | return splash:png{region=region} 33 | end -------------------------------------------------------------------------------- /splash/examples/har.lua: -------------------------------------------------------------------------------- 1 | function main(splash, args) 2 | assert(splash:go(args.url)) 3 | assert(splash:wait(0.5)) 4 | return splash:har() 5 | end -------------------------------------------------------------------------------- /splash/examples/log-locked-requests.lua: -------------------------------------------------------------------------------- 1 | treat = require("treat") 2 | 3 | function main(splash, args) 4 | local urls = {} 5 | splash:on_navigation_locked(function(request) 6 | table.insert(urls, request.url) 7 | end) 8 | 9 | assert(splash:go(splash.args.url)) 10 | splash:lock_navigation() 11 | splash:select("a"):mouse_click() 12 | return treat.as_array(urls) 13 | end -------------------------------------------------------------------------------- /splash/examples/log-requests.lua: -------------------------------------------------------------------------------- 1 | treat = require("treat") 2 | 3 | function main(splash, args) 4 | local urls = {} 5 | splash:on_request(function(request) 6 | table.insert(urls, request.url) 7 | end) 8 | 9 | assert(splash:go(splash.args.url)) 10 | return treat.as_array(urls) 11 | end -------------------------------------------------------------------------------- /splash/examples/multiple-pages.lua: -------------------------------------------------------------------------------- 1 | treat = require("treat") 2 | 3 | -- Given an url, this function returns a table 4 | -- with the page screenshoot, it's HTML contents 5 | -- and it's title. 6 | function page_info(splash, url) 7 | local ok, msg = splash:go(url) 8 | if not ok then 9 | return {ok=false, reason=msg} 10 | end 11 | local res = { 12 | html=splash:html(), 13 | title=splash:evaljs('document.title'), 14 | image=splash:png(), 15 | ok=true, 16 | } 17 | return res 18 | end 19 | 20 | function main(splash, args) 21 | -- visit first 3 pages of hacker news 22 | local base = "https://news.ycombinator.com/news?p=" 23 | local result = treat.as_array({}) 24 | for i=1,3 do 25 | local url = base .. i 26 | result[i] = page_info(splash, url) 27 | end 28 | return result 29 | end -------------------------------------------------------------------------------- /splash/examples/preload-functions.lua: -------------------------------------------------------------------------------- 1 | function main(splash, args) 2 | splash:autoload([[ 3 | function get_document_title(){ 4 | return document.title; 5 | } 6 | ]]) 7 | assert(splash:go(args.url)) 8 | 9 | return splash:evaljs("get_document_title()") 10 | end -------------------------------------------------------------------------------- /splash/examples/preload-jquery.lua: -------------------------------------------------------------------------------- 1 | function main(splash, args) 2 | assert(splash:autoload("https://code.jquery.com/jquery-2.1.3.min.js")) 3 | assert(splash:go(splash.args.url)) 4 | local version = splash:evaljs("$.fn.jquery") 5 | 6 | return 'JQuery version: ' .. version 7 | end -------------------------------------------------------------------------------- /splash/examples/render-multiple.lua: -------------------------------------------------------------------------------- 1 | function main(splash, args) 2 | splash:set_viewport_size(800, 600) 3 | splash:set_user_agent('Splash bot') 4 | local example_urls = {"www.google.com", "www.bbc.co.uk", "scrapinghub.com"} 5 | local urls = args.urls or example_urls 6 | local results = {} 7 | for _, url in ipairs(urls) do 8 | local ok, reason = splash:go("http://" .. url) 9 | if ok then 10 | splash:wait(0.2) 11 | results[url] = splash:png() 12 | end 13 | end 14 | return results 15 | end 16 | -------------------------------------------------------------------------------- /splash/examples/render-png.lua: -------------------------------------------------------------------------------- 1 | -- A simplistic implementation of render.png 2 | -- endpoint. 3 | function main(splash, args) 4 | assert(splash:go(args.url)) 5 | 6 | return splash:png{ 7 | width=args.width, 8 | height=args.height 9 | } 10 | end -------------------------------------------------------------------------------- /splash/examples/return-title.lua: -------------------------------------------------------------------------------- 1 | function main(splash, args) 2 | splash:go("http://example.com") 3 | splash:wait(0.5) 4 | local title = splash:evaljs("document.title") 5 | return {title=title} 6 | end -------------------------------------------------------------------------------- /splash/examples/run-js.lua: -------------------------------------------------------------------------------- 1 | function main(splash, args) 2 | assert(splash:go("https://news.ycombinator.com/")) 3 | splash:runjs([[ 4 | document.querySelector('table') 5 | .style.backgroundColor = "#fff"; 6 | ]]) 7 | return {png=splash:png()} 8 | end -------------------------------------------------------------------------------- /splash/examples/scroll.lua: -------------------------------------------------------------------------------- 1 | function main(splash, args) 2 | splash:go(args.url) 3 | local scroll_to = splash:jsfunc("window.scrollTo") 4 | scroll_to(0, 300) 5 | return {png=splash:png()} 6 | end -------------------------------------------------------------------------------- /splash/examples/splash-haproxy.conf: -------------------------------------------------------------------------------- 1 | # Example HAProxy 1.5 config for Splash. 2 | global 3 | # raise it if necessary 4 | maxconn 256 5 | # required for stats page 6 | stats socket /tmp/haproxy 7 | 8 | defaults 9 | log global 10 | mode http 11 | 12 | # remove requests from a queue when clients disconnect; 13 | # see https://cbonte.github.io/haproxy-dconv/configuration-1.5.html#4.2-option%20abortonclose 14 | option abortonclose 15 | 16 | # increase these values if you want to 17 | # allow long request queues in HAProxy 18 | timeout connect 10m 19 | timeout client 10m 20 | timeout server 10m 21 | 22 | # visit 0.0.0.0:8036 to see HAProxy stats page 23 | listen stats :8036 24 | mode http 25 | stats enable 26 | stats hide-version 27 | stats uri / 28 | stats refresh 5s 29 | stats realm Haproxy\ Statistics 30 | # enable auth if necessary 31 | # stats auth admin1:AdMiN123 32 | # stats auth admin2:AdMiN321 33 | 34 | # Splash Cluster configuration 35 | frontend http-in 36 | bind *:8050 37 | default_backend splash-cluster 38 | 39 | backend splash-cluster 40 | option httpchk GET / 41 | balance leastconn 42 | 43 | # Add/remove servers below: 44 | server splash1 127.0.0.1:8060 check maxconn 5 fall 15 45 | server splash2 127.0.0.1:8070 check maxconn 5 fall 15 46 | server splash-docker1 192.168.59.103:8060 check maxconn 5 fall 15 47 | server splash-docker2 192.168.59.103:8070 check maxconn 5 fall 15 48 | -------------------------------------------------------------------------------- /splash/examples/submit-search.lua: -------------------------------------------------------------------------------- 1 | function find_search_input(inputs) 2 | if #inputs == 1 then 3 | return inputs[1] 4 | else 5 | for _, input in ipairs(inputs) do 6 | if input.node.attributes.type == "search" then 7 | return input 8 | end 9 | end 10 | end 11 | end 12 | 13 | function find_input(forms) 14 | local potential = {} 15 | 16 | for _, form in ipairs(forms) do 17 | local inputs = form.node:querySelectorAll('input:not([type="hidden"])') 18 | if #inputs ~= 0 then 19 | local input = find_search_input(inputs) 20 | if input then 21 | return form, input 22 | end 23 | 24 | potential[#potential + 1] = {input=inputs[1], form=form} 25 | end 26 | end 27 | 28 | return potential[1].form, potential[1].input 29 | end 30 | 31 | function main(splash, args) 32 | -- find a form and submit "splash" to it 33 | local function search_for_splash() 34 | local forms = splash:select_all('form') 35 | 36 | if #forms == 0 then 37 | error('no search form is found') 38 | end 39 | 40 | local form, input = find_input(forms) 41 | 42 | if not input then 43 | error('no search form is found') 44 | end 45 | 46 | assert(input:send_keys('splash')) 47 | assert(splash:wait(0)) 48 | assert(form:submit()) 49 | end 50 | 51 | -- main rendering script 52 | assert(splash:go(args.url)) 53 | assert(splash:wait(1)) 54 | search_for_splash() 55 | assert(splash:wait(3)) 56 | 57 | return splash:png() 58 | end -------------------------------------------------------------------------------- /splash/examples/wait-for-element.lua: -------------------------------------------------------------------------------- 1 | function wait_for_element(splash, css, maxwait) 2 | -- Wait until a selector matches an element 3 | -- in the page. Return an error if waited more 4 | -- than maxwait seconds. 5 | if maxwait == nil then 6 | maxwait = 10 7 | end 8 | return splash:wait_for_resume(string.format([[ 9 | function main(splash) { 10 | var selector = '%s'; 11 | var maxwait = %s; 12 | var end = Date.now() + maxwait*1000; 13 | 14 | function check() { 15 | if(document.querySelector(selector)) { 16 | splash.resume('Element found'); 17 | } else if(Date.now() >= end) { 18 | var err = 'Timeout waiting for element'; 19 | splash.error(err + " " + selector); 20 | } else { 21 | setTimeout(check, 200); 22 | } 23 | } 24 | check(); 25 | } 26 | ]], css, maxwait)) 27 | end 28 | 29 | function main(splash, args) 30 | splash:go("http://scrapinghub.com") 31 | wait_for_element(splash, "#foo") 32 | return {png=splash:png()} 33 | end -------------------------------------------------------------------------------- /splash/examples/with-timeout.lua: -------------------------------------------------------------------------------- 1 | function main(splash, args) 2 | local ok, result = splash:with_timeout(function() 3 | -- try commenting out splash:wait(3) 4 | splash:wait(3) 5 | assert(splash:go(args.url)) 6 | end, 2) 7 | 8 | if not ok then 9 | if result == "timeout_over" then 10 | return "Cannot navigate to the url within 2 seconds" 11 | else 12 | return result 13 | end 14 | end 15 | return "Navigated to the url within 2 seconds" 16 | end -------------------------------------------------------------------------------- /splash/har/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from .utils import get_duration, format_datetime 3 | -------------------------------------------------------------------------------- /splash/har/log.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from collections import namedtuple 3 | from datetime import datetime 4 | 5 | import splash 6 | from PyQt5.QtCore import PYQT_VERSION_STR, QT_VERSION_STR 7 | from PyQt5.QtWebKit import qWebKitVersion 8 | 9 | from .utils import get_duration, format_datetime, cleaned_har_entry 10 | 11 | 12 | HarEvent = namedtuple('HarEvent', 'type data') 13 | 14 | HAR_ENTRY = 'entry' 15 | HAR_TIMING = 'timing' 16 | HAR_URL_CHANGED = 'urlChanged' 17 | HAR_TITLE_CHANGED = 'titleChanged' 18 | 19 | 20 | class HarLog(object): 21 | """ 22 | Helper class for building HAR data. 23 | """ 24 | 25 | def __init__(self): 26 | self.created_at = datetime.utcnow() 27 | self.network_entries_map = {} # key => network entry 28 | self.events = [] # all entries in order, including the events 29 | self.pages = None 30 | 31 | def get_mutable_entry(self, req_id, create=False): 32 | """ 33 | Return a dict with HAR entry data. The dict is not a copy; 34 | caller can modify the result and the changes will be kept. 35 | """ 36 | if create: 37 | assert req_id not in self.network_entries_map 38 | entry = {"_idx": req_id} 39 | self.network_entries_map[req_id] = entry 40 | self.events.append(HarEvent(HAR_ENTRY, entry)) 41 | return self.network_entries_map[req_id] 42 | 43 | def has_entry(self, req_id): 44 | """ Return True if entry exists for this request """ 45 | return req_id in self.network_entries_map 46 | 47 | def store_url(self, url): 48 | """ Call this method when URL is changed. """ 49 | self.events.append(HarEvent(HAR_URL_CHANGED, str(url))) 50 | 51 | def store_title(self, title): 52 | """ Call this method when page title is changed. """ 53 | self.events.append(HarEvent(HAR_TITLE_CHANGED, str(title))) 54 | 55 | def store_timing(self, name): 56 | """ 57 | Call this method when an event you want to store timing for happened. 58 | """ 59 | self.events.append( 60 | HarEvent(HAR_TIMING, {"name": name, "time": datetime.utcnow()}) 61 | ) 62 | 63 | def todict(self): 64 | """ Return HAR log as a Python dict. """ 65 | 66 | # import pprint 67 | # pprint.pprint(self.events) 68 | 69 | self._fill_pages() 70 | 71 | return { 72 | "log": { 73 | "version": "1.2", 74 | "creator": { 75 | "name": "Splash", 76 | "version": splash.__version__, 77 | }, 78 | "browser": self._get_browser(), 79 | "entries": self._get_har_entries(), 80 | "pages": self.pages, 81 | } 82 | } 83 | 84 | def _get_browser(self): 85 | return { 86 | "name": "QWebKit", 87 | "version": str(qWebKitVersion()), 88 | "comment": "PyQt %s, Qt %s" % (PYQT_VERSION_STR, QT_VERSION_STR), 89 | } 90 | 91 | def _empty_page(self, page_id, started_dt): 92 | if not isinstance(started_dt, str): 93 | started_dt = format_datetime(started_dt) 94 | 95 | return { 96 | "id": str(page_id), 97 | "title": "[no title]", 98 | "startedDateTime": started_dt, 99 | "pageTimings": { 100 | "onContentLoad": -1, 101 | "onLoad": -1, 102 | } 103 | } 104 | 105 | def _fill_pages(self): 106 | page_id = 1 107 | started_dt = self.created_at 108 | current_page = self._empty_page(page_id, started_dt) 109 | first_page = True 110 | 111 | self.pages = [current_page] 112 | 113 | for idx, ev in enumerate(self.events): 114 | if ev.type == HAR_TIMING: 115 | name = ev.data["name"] 116 | time = get_duration(started_dt, ev.data["time"]) 117 | current_page["pageTimings"][name] = time 118 | 119 | elif ev.type == HAR_TITLE_CHANGED: 120 | current_page["title"] = ev.data 121 | 122 | elif ev.type == HAR_ENTRY: 123 | ev.data["pageref"] = str(page_id) 124 | 125 | elif ev.type == HAR_URL_CHANGED: 126 | # We need to find a network entry which caused URL 127 | # to change - it belongs to this new page. 128 | cause_ev = self._prev_entry(ev.data, idx) 129 | if first_page: 130 | first_page = False 131 | else: 132 | # Start a new page. 133 | page_id += 1 134 | if cause_ev is None: 135 | # XXX: is it a right thing to do? 136 | started_dt = self.created_at 137 | else: 138 | # FIXME: this requires non-standard _tmp data 139 | started_dt = cause_ev.data['_tmp']['start_time'] 140 | current_page = self._empty_page(page_id, started_dt) 141 | self.pages.append(current_page) 142 | 143 | if cause_ev is not None: 144 | cause_ev.data["pageref"] = str(page_id) 145 | 146 | def _prev_entry(self, url, last_idx): 147 | for ev in reversed(self.events[:last_idx]): 148 | if ev.type != HAR_ENTRY: 149 | continue 150 | if ev.data["request"]["url"] == url: 151 | return ev 152 | 153 | def _get_har_entries(self): 154 | return [ 155 | cleaned_har_entry(e.data) 156 | for e in self.events 157 | if e.type == HAR_ENTRY 158 | ] 159 | -------------------------------------------------------------------------------- /splash/har/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import base64 3 | from operator import itemgetter 4 | import itertools 5 | from datetime import datetime 6 | 7 | 8 | def format_datetime(dt): 9 | """ Format datetime.datetime object to make HAR validator happy """ 10 | return dt.isoformat() + 'Z' 11 | 12 | 13 | def get_duration(start, end=None): 14 | """ Return duration between `start` and `end` datetimes in HAR format """ 15 | if end is None: 16 | end = datetime.utcnow() 17 | elapsed = (end - start).total_seconds() 18 | return int(elapsed * 1000) # ms 19 | 20 | 21 | def cleaned_har_entry(dct): 22 | return {k: v for (k, v) in dct.items() if k not in {'_tmp', '_idx'}} 23 | 24 | 25 | def entries2pages(entries): 26 | """ Group HAR entries into pages by pageref """ 27 | pages = [] 28 | for pageref, group in itertools.groupby(entries, key=itemgetter("pageref")): 29 | pages.append(list(group)) 30 | return pages 31 | 32 | 33 | def get_response_body_bytes(har_response): 34 | """ Return binary response data """ 35 | content = har_response['content'] 36 | body = content.get('text', None) 37 | if body is None: 38 | return None 39 | encoding = content.get('encoding', None) 40 | if encoding == 'base64': 41 | return base64.b64decode(body) 42 | if encoding is None or encoding == 'binary': 43 | if not isinstance(body, bytes): 44 | return body.encode('utf8') 45 | return body 46 | else: 47 | raise ValueError("Unsupported HAR content encoding: %r" % encoding) 48 | -------------------------------------------------------------------------------- /splash/jsutils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import json 3 | 4 | 5 | def escape_js(*args): 6 | return json.dumps(args, ensure_ascii=False)[1:-1] 7 | 8 | 9 | # JS function which only allows plain arrays/objects and other primitives 10 | # with a restriction on maximum allowed depth. 11 | # A more natural way would be to use JSON.stringify, 12 | # but user can override global JSON object to bypass protection. 13 | SANITIZE_FUNC_JS = u""" 14 | function (obj, max_depth){ 15 | max_depth = max_depth ? max_depth : 100; 16 | function _s(o, d) { 17 | if (d <= 0) { 18 | throw Error("Object is too deep or recursive"); 19 | } 20 | if (o === null) { 21 | return ""; // this is the way Qt handles it 22 | } 23 | if (typeof o == 'object') { 24 | if (Array.isArray(o)) { 25 | var res = []; 26 | for (var i = 0; i < o.length; i++) { 27 | res[i] = _s(o[i], d-1); 28 | } 29 | return res; 30 | } 31 | else if ( 32 | (Object.getPrototypeOf(o) == Object.prototype) || 33 | (o instanceof CSSStyleDeclaration)) { 34 | var res = {}; 35 | for (var key in o) { 36 | if (o.hasOwnProperty(key)) { 37 | res[key] = _s(o[key], d-1); 38 | } 39 | } 40 | return res; 41 | } 42 | else if (o instanceof Date) { 43 | return o.toJSON(); 44 | } 45 | else if (o instanceof ClientRect) { 46 | return { 47 | top: o.top, left: o.left, bottom: o.bottom, right: o.right, 48 | width: o.width, height: o.height, 49 | }; 50 | } 51 | else if (o instanceof ClientRectList) { 52 | return _s(Array.prototype.slice.call(o)) 53 | } 54 | else if (o instanceof NamedNodeMap) { 55 | var nodes = {}; 56 | Array.prototype.forEach.call(o, function(node) { 57 | nodes[node.name.toLowerCase()] = node.value; 58 | }); 59 | return nodes; 60 | } 61 | else if (o instanceof DOMTokenList) { 62 | return _s(Array.prototype.slice.call(o)) 63 | } 64 | else { 65 | // likely host object 66 | return undefined; 67 | } 68 | } 69 | else if (typeof o == 'function') { 70 | return undefined; 71 | } 72 | else { 73 | return o; // native type 74 | } 75 | } 76 | return _s(obj, max_depth); 77 | } 78 | """ 79 | 80 | 81 | def get_sanitized_result_js(expression, max_depth=0): 82 | """ 83 | Return a string with JavaScript code which returns a sanitized result of 84 | the ``expression``: only allow objects/arrays/other primitives are allowed, 85 | and an exception is raised for objects/arrays which are too deep. 86 | 87 | ``expression`` should be a JS string constant (already in quotes) or 88 | any other JS expression. 89 | 90 | Use it to sanitize data which should be returned from 91 | QWebFrame.evaluateJavaScript - Qt5 can go mad if we try to return something 92 | else (objects with circular references, DOM elements, ...). 93 | """ 94 | return u"({sanitize_func})({expression}, {max_depth})".format( 95 | sanitize_func=SANITIZE_FUNC_JS, 96 | expression=expression, 97 | max_depth=max_depth 98 | ) 99 | 100 | 101 | STORE_DOM_ELEMENTS_JS = u""" 102 | function (elements_storage_name, o) { 103 | var storage = window[elements_storage_name]; 104 | 105 | function storeNode(node) { 106 | var id = storage.getId(); 107 | Object.defineProperty(storage, id, { 108 | configurable: false, 109 | enumerable: false, 110 | writable: false, 111 | value: node, 112 | }); 113 | return id; 114 | } 115 | 116 | if (o instanceof Node) { 117 | var id = storeNode(o); 118 | return { 119 | type: 'Node', 120 | id: id, 121 | } 122 | } 123 | else if (o instanceof NodeList) { 124 | var ids = Array.prototype.slice.call(o).map(storeNode); 125 | return { 126 | type: 'NodeList', 127 | ids: ids, 128 | } 129 | } 130 | return { 131 | type: 'other', 132 | data: o, 133 | }; 134 | } 135 | """ 136 | 137 | 138 | def store_dom_elements(expression, elements_storage_name): 139 | return u"({store_func})('{elements_storage_name}', {expression})".format( 140 | store_func=STORE_DOM_ELEMENTS_JS, 141 | elements_storage_name=elements_storage_name, 142 | expression=expression 143 | ) 144 | 145 | 146 | def get_process_errors_js(expression): 147 | """ 148 | Return JS code which evaluates an ``expression`` and 149 | returns ``{error: false, result: ...}`` if there is no exception 150 | or ``{error: true, errorType: ..., errorMessage: ..., errorRepr: ...}`` 151 | if expression raised an error when evaluating. 152 | """ 153 | return u""" 154 | (function () { 155 | try { 156 | return { 157 | error: false, 158 | result: %(expression)s, 159 | } 160 | } 161 | catch (e) { 162 | return { 163 | error: true, 164 | errorType: e.name, 165 | errorMessage: e.message, 166 | errorRepr: e.toString(), 167 | }; 168 | } 169 | })() 170 | """ % dict(expression=expression) 171 | -------------------------------------------------------------------------------- /splash/kernel/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /splash/kernel/__main__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | To install the kernel type 4 | 5 | $ python -m splash.kernel install 6 | 7 | To start the kernel, use IPython web interface or run 8 | 9 | $ python -m splash.kernel 10 | 11 | """ 12 | import sys 13 | from .kernel import start, install 14 | 15 | if len(sys.argv) > 1 and sys.argv[1] == 'install': 16 | install() 17 | else: 18 | start() 19 | -------------------------------------------------------------------------------- /splash/kernel/errors.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import lupa 3 | 4 | from splash.errors import ScriptError 5 | from splash.lua import parse_error_message 6 | 7 | 8 | def error_repr(e): 9 | """ 10 | Return repr of an exception, for printing as a cell execution result. 11 | """ 12 | if isinstance(e, (ScriptError, lupa.LuaSyntaxError, lupa.LuaError)): 13 | if isinstance(e, ScriptError): 14 | info = e.args[0] 15 | tp = info['type'] 16 | else: 17 | info = parse_error_message(e.args[0]) 18 | tp = ScriptError.SYNTAX_ERROR 19 | line_num = info.get('line_number', -1) 20 | message = info.get('error', info.get('message')) 21 | return "%s [input]:%s: %s" % (tp, line_num, message) 22 | elif isinstance(e, Exception): 23 | return repr(e) 24 | return ScriptError.UNKNOWN_ERROR 25 | -------------------------------------------------------------------------------- /splash/kernel/inspections.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Inspections for Lua code. 4 | """ 5 | import os 6 | import glob 7 | import json 8 | from splash.kernel.lua_parser import ( 9 | LuaParser, 10 | Standalone, 11 | SplashMethod, 12 | SplashMethodOpenBrace, 13 | SplashAttribute, 14 | ) 15 | 16 | 17 | class Inspector(object): 18 | """ Inspector for Lua code """ 19 | def __init__(self, lua): 20 | self.lua = lua 21 | self.docs = _SplashDocs() 22 | self.parser = LuaParser(lua) 23 | 24 | def parse(self, code, cursor_pos): 25 | return self.parser.parse(code, cursor_pos, allow_inside=True) 26 | 27 | def doc_repr(self, doc): 28 | if not doc.get("signature"): 29 | return doc["content"] 30 | 31 | parts = [doc["signature"]] 32 | if doc.get('short'): 33 | parts += [doc["short"]] 34 | 35 | if doc.get('params'): 36 | parts += ["Parameters:\n\n" + doc["params"]] 37 | 38 | if doc.get('returns'): 39 | parts += ["Returns: " + doc["returns"]] 40 | 41 | if doc.get('async'): 42 | parts += ["Async: " + doc["async"]] 43 | 44 | if doc.get('details'): 45 | parts += [doc["details"]] 46 | 47 | return "\n\n".join(parts) 48 | 49 | def help(self, code, cursor_pos, detail_level): 50 | # from .completer import _pp 51 | 52 | NO_RESULT = { 53 | 'status': 'ok', 54 | 'data': {}, 55 | 'metadata': {}, 56 | 'found': False, 57 | } 58 | 59 | m = self.parse(code, cursor_pos) 60 | if m is None: 61 | return NO_RESULT 62 | 63 | doc = None 64 | 65 | if isinstance(m, (SplashMethod, SplashMethodOpenBrace)): 66 | doc = self.docs.get("splash:" + m.prefix) 67 | 68 | elif isinstance(m, SplashAttribute): 69 | doc = self.docs.get("splash." + m.prefix) 70 | 71 | elif isinstance(m, Standalone) and m.value == "splash": 72 | doc = self.docs.get("splash") 73 | 74 | if doc is None: 75 | return NO_RESULT 76 | 77 | return { 78 | 'status': 'ok', 79 | 'data': {"text/plain": self.doc_repr(doc)}, 80 | 'metadata': {}, 81 | 'found': True, 82 | } 83 | 84 | 85 | class _SplashDocs(object): 86 | def __init__(self, folder=None): 87 | if folder is None: 88 | folder = os.path.abspath( 89 | os.path.join(os.path.dirname(__file__), "inspections") 90 | ) 91 | 92 | self.info = {} 93 | files = sorted(glob.glob(folder + "/*.json")) 94 | for name in files: 95 | full_name = os.path.join(folder, name) 96 | with open(full_name, "rb") as f: 97 | info = json.loads(f.read().decode('utf-8')) 98 | self.info.update(info) 99 | 100 | def __getitem__(self, item): 101 | return self.info[item] 102 | 103 | def get(self, key, default=None): 104 | return self.info.get(key, default) 105 | -------------------------------------------------------------------------------- /splash/kernel/inspections/README.rst: -------------------------------------------------------------------------------- 1 | A folder with inspection data for Splash IPython kernel. 2 | 3 | :file:`splash-auto.json` is generated from reference documentation using 4 | :file:`scripts/rst2inspections.py` script. 5 | -------------------------------------------------------------------------------- /splash/kernel/inspections/splash.json: -------------------------------------------------------------------------------- 1 | { 2 | "splash":{ 3 | "content": "An object which controls the web browser window." 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /splash/kernel/kernels/splash-py2/kernel.json: -------------------------------------------------------------------------------- 1 | { 2 | "argv": ["python", "-m", "splash.kernel", "-f", "{connection_file}"], 3 | "display_name": "Splash", 4 | "name": "Splash", 5 | "language": "lua" 6 | } 7 | -------------------------------------------------------------------------------- /splash/kernel/kernels/splash-py2/logo-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/splash/ab28b0233c245461189881f1f5656b96371a4b40/splash/kernel/kernels/splash-py2/logo-32x32.png -------------------------------------------------------------------------------- /splash/kernel/kernels/splash-py2/logo-64x64.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/splash/ab28b0233c245461189881f1f5656b96371a4b40/splash/kernel/kernels/splash-py2/logo-64x64.png -------------------------------------------------------------------------------- /splash/kernel/kernels/splash-py3/kernel.json: -------------------------------------------------------------------------------- 1 | { 2 | "argv": ["python3", "-m", "splash.kernel", "-f", "{connection_file}"], 3 | "display_name": "Splash", 4 | "language": "lua" 5 | } 6 | -------------------------------------------------------------------------------- /splash/kernel/kernels/splash-py3/logo-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/splash/ab28b0233c245461189881f1f5656b96371a4b40/splash/kernel/kernels/splash-py3/logo-32x32.png -------------------------------------------------------------------------------- /splash/kernel/kernels/splash-py3/logo-64x64.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/splash/ab28b0233c245461189881f1f5656b96371a4b40/splash/kernel/kernels/splash-py3/logo-64x64.png -------------------------------------------------------------------------------- /splash/log.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from twisted.python import log 3 | 4 | 5 | class SplashLogger: 6 | """ 7 | Logging object for Splash. 8 | 9 | XXX: should we just switch to stdlib logging? 10 | """ 11 | def __init__(self, uid, verbosity: int) -> None: 12 | self.uid = uid 13 | self.verbosity = verbosity 14 | 15 | def log(self, message, min_level: int = None) -> None: 16 | if min_level is not None and self.verbosity < min_level: 17 | return 18 | 19 | if isinstance(message, str): 20 | message = message.encode('unicode-escape').decode('ascii') 21 | 22 | message = "[%s] %s" % (self.uid, message) 23 | log.msg(message, system='render') 24 | 25 | 26 | class DummyLogger: 27 | """ Logger to use when no logger is passed into rendering functions. """ 28 | def log(self, *args, **kwargs): 29 | pass 30 | -------------------------------------------------------------------------------- /splash/lua_modules/completer.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Lua autocompletion utilities for IPython kernel 3 | -- 4 | local lexer = require('vendor/lexer') 5 | -- local inspect = require('vendor/inspect') 6 | local completer = {} 7 | 8 | 9 | -- 10 | -- Tokenize Lua source code 11 | -- 12 | function completer.tokenize(src) 13 | local res = {} 14 | local filter = {space=true, comments=true} 15 | local options = {number=true, string=true} 16 | 17 | for tp, value in lexer.lua(src, filter, options) do 18 | res[#res+1] = {tp=tp, value=value} 19 | end 20 | return res 21 | end 22 | 23 | 24 | -- 25 | -- Return all string table keys for which values passes `value_ok` test. 26 | -- 27 | function completer.get_table_keys(tbl, value_ok) 28 | local res = {} 29 | for k, v in pairs(tbl) do 30 | if type(k) == "string" and value_ok(k, v) then 31 | res[#res+1] = k 32 | end 33 | end 34 | return res 35 | end 36 | 37 | 38 | -- 39 | -- Return all string metatable.__index keys with values passing `value_ok` test. 40 | -- 41 | function completer.get_metatable_keys(obj, value_ok) 42 | local mt = getmetatable(obj) 43 | if type(mt) ~= 'table' then return {} end 44 | local index = mt.__index 45 | if type(index) == 'table' then 46 | return completer.get_table_keys(index, value_ok) 47 | elseif type(index) == 'function' then 48 | -- Assume index function eventually gets values from metatable itself. 49 | -- This is not always correct, but that's better than nothing. 50 | return completer.get_table_keys(mt, value_ok) 51 | else 52 | return {} 53 | end 54 | end 55 | 56 | 57 | -- 58 | -- Return all attribute names of an object. 59 | -- 60 | function completer.obj_attrs(obj, no_methods, only_methods) 61 | local tp = type(obj) 62 | 63 | local function value_ok(k, v) 64 | local is_meth = type(v) == 'function' 65 | if is_meth and no_methods then return false end 66 | if not is_meth and only_methods then return false end 67 | return true 68 | end 69 | 70 | if tp == "nil" then 71 | return {} 72 | end 73 | 74 | if tp == "string" then 75 | return completer.get_metatable_keys(obj, value_ok) 76 | end 77 | 78 | -- todo: strings, functions, ...? 79 | 80 | if tp == "table" then 81 | local keys = completer.get_table_keys(obj, value_ok) 82 | local mt_keys = completer.get_metatable_keys(obj, value_ok) 83 | for idx, k in ipairs(mt_keys) do 84 | keys[#keys+1] = k 85 | end 86 | return keys 87 | end 88 | 89 | return {} 90 | end 91 | 92 | -- 93 | -- Return an object given its lookup names chain. 94 | -- 95 | function completer.resolve_obj(names_chain) 96 | if #names_chain == 0 then 97 | error("invalid attributes chain") 98 | end 99 | 100 | local obj = _G 101 | for idx, attr in ipairs(names_chain) do 102 | obj = obj[attr] 103 | end 104 | 105 | return obj 106 | end 107 | 108 | -- 109 | -- Return all attribute names of a global variable or its attribute, 110 | -- resolving names lookup chain. 111 | -- 112 | function completer.attrs(names_chain, no_methods, only_methods) 113 | local obj = completer.resolve_obj(names_chain) 114 | return completer.obj_attrs(obj, no_methods, only_methods) 115 | end 116 | 117 | 118 | return completer 119 | -------------------------------------------------------------------------------- /splash/lua_modules/element.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- A wrapper for Element objects 3 | -- 4 | local wraputils = require("wraputils") 5 | 6 | 7 | local Element = wraputils.create_metatable() 8 | 9 | function Element._create(py_element) 10 | local element = {} 11 | return wraputils.wrap_exposed_object(py_element, element, Element) 12 | end 13 | 14 | function Element:node_method(...) 15 | local ok, func = self:_node_method(...) 16 | 17 | if not ok then 18 | return ok, func 19 | end 20 | 21 | return ok, wraputils.unwraps_python_result(func, 2) 22 | end 23 | 24 | 25 | function Element:node_property(...) 26 | local ok, result, is_element = self:_node_property(...) 27 | 28 | if not ok then 29 | return ok, result 30 | end 31 | 32 | if is_element then 33 | return true, Element._create(result) 34 | end 35 | 36 | return true, result 37 | end 38 | 39 | function Element:to_table() 40 | return { type = 'node', id = self.inner_id } 41 | end 42 | 43 | 44 | local ElementStyle = wraputils.create_metatable() 45 | 46 | ElementStyle.__index = function(self, index) 47 | return self:_get_style(index) 48 | end 49 | 50 | ElementStyle.__newindex = function(self, index, value) 51 | return self:_set_style(index, value) 52 | end 53 | 54 | function ElementStyle._create(py_element_style) 55 | local element_style = {} 56 | return wraputils.wrap_exposed_object(py_element_style, element_style, ElementStyle) 57 | end 58 | 59 | 60 | function is_event_name(str) 61 | return string.sub(str, 1, 2) == 'on' 62 | end 63 | 64 | function get_event_name(str) 65 | return string.sub(str, 3, string.len(str)) 66 | end 67 | 68 | local element_index = Element.__index 69 | local element_newindex = Element.__newindex 70 | 71 | Element.__index = function(self, index) 72 | if index == 'node' then 73 | return self 74 | end 75 | 76 | if index == 'style' then 77 | local py_element_style = self:_get_style() 78 | return ElementStyle._create(py_element_style) 79 | end 80 | 81 | if is_event_name(index) then 82 | local event_name = get_event_name(index) 83 | error("Cannot get " .. event_name .. " event handler") 84 | end 85 | 86 | return element_index(self, index) 87 | end 88 | 89 | Element.__newindex = function(self, index, value) 90 | if index == 'node' then 91 | error("Cannot set node field of the elmeent" ) 92 | end 93 | 94 | if string.sub(index, 1, 2) == 'on' then 95 | local event_name = get_event_name(index) 96 | return self:_set_event_handler(event_name, value) 97 | end 98 | 99 | return element_newindex(self, index, value) 100 | end 101 | 102 | return Element -------------------------------------------------------------------------------- /splash/lua_modules/event.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- A wrapper for Events objects 3 | -- 4 | local wraputils = require("wraputils") 5 | 6 | local Event = {} 7 | 8 | function Event._create(py_event) 9 | local event = {} 10 | return wraputils.wrap_exposed_object(py_event, event, Event) 11 | end 12 | 13 | local methods = { preventDefault = true, stopImmediatePropagation = true, stopPropagation = true } 14 | 15 | local __index = Event.__index 16 | 17 | Event.__index = function(self, index) 18 | if not methods[index] then 19 | return self:_get_property(index) 20 | end 21 | 22 | return __index(self, index) 23 | end 24 | 25 | return Event 26 | -------------------------------------------------------------------------------- /splash/lua_modules/extras.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- A wrapper for __extras object. 3 | -- It is used to safely expose additional functions 4 | -- to Lua runtime. 5 | -- 6 | local wraputils = require("wraputils") 7 | 8 | local Extras = wraputils.create_metatable() 9 | 10 | function Extras._create(py_extras) 11 | local extras = {} 12 | return wraputils.wrap_exposed_object(py_extras, extras, Extras) 13 | end 14 | 15 | return Extras 16 | -------------------------------------------------------------------------------- /splash/lua_modules/libs/base64.lua: -------------------------------------------------------------------------------- 1 | local base64 = {} 2 | 3 | function base64.encode(data) 4 | if type(data) ~= 'string' and type(data) ~= 'userdata' then 5 | error("base64.encode argument must be a string", 2) 6 | end 7 | 8 | return __extras:base64_encode(data) 9 | end 10 | 11 | 12 | function base64.decode(data) 13 | if type(data) ~= 'string' then 14 | error("base64.decode argument must be a string", 2) 15 | end 16 | 17 | return __extras:base64_decode(data) 18 | end 19 | 20 | return base64 -------------------------------------------------------------------------------- /splash/lua_modules/libs/json.lua: -------------------------------------------------------------------------------- 1 | local json = {} 2 | 3 | function json.encode(data) 4 | return __extras:json_encode(data) 5 | end 6 | 7 | 8 | function json.decode(data) 9 | if type(data) ~= 'string' then 10 | error("json.decode argument must be a string") 11 | end 12 | 13 | return __extras:json_decode(data) 14 | end 15 | 16 | return json -------------------------------------------------------------------------------- /splash/lua_modules/libs/treat.lua: -------------------------------------------------------------------------------- 1 | local treat = {} 2 | 3 | local wraputils = require("wraputils") 4 | 5 | -- 6 | -- Mark a string as binary. It means it no longer 7 | -- can be processed from Lua, but it can be 8 | -- returned as a main() result as-is. 9 | -- 10 | -- Binary objects are also auto-encoded to base64 when 11 | -- encoding to JSON. 12 | -- 13 | function treat.as_binary(s, content_type) 14 | if type(s) ~= 'userdata' and type(s) ~= 'string' then 15 | error("as_binary argument must be a string or a binary object", 2) 16 | end 17 | return __extras:treat_as_binary(s, content_type) 18 | end 19 | 20 | 21 | -- 22 | -- Get original string value and a content type of 23 | -- a binary object created by treat.as_binary or 24 | -- returned by one of Splash methods. 25 | -- 26 | function treat.as_string(s) 27 | if type(s) ~= 'userdata' then 28 | return tostring(s) 29 | end 30 | return __extras:treat_as_string(s) 31 | end 32 | 33 | 34 | -- 35 | -- Mark a Lua table as an array. Such tables 36 | -- will be represented as arrays when encoded to JSON. 37 | -- This function modifies its argument inplace. 38 | -- 39 | function treat.as_array(tbl) 40 | -- the same function is available in 41 | -- Splash Python code as lua._mark_table_as_array 42 | if type(tbl) ~= 'table' or wraputils.is_wrapped(tbl) then 43 | error('as_array argument must be a table', 2) 44 | end 45 | setmetatable(tbl, {__metatable="array"}) 46 | return tbl 47 | end 48 | 49 | 50 | -- 51 | -- Make keys in a Lua table case-insensitive. 52 | -- 53 | function treat.as_case_insensitive(tbl) 54 | local copy = {} 55 | local lowercase_copy = {} 56 | for k, v in pairs(tbl) do 57 | copy[k] = v 58 | lowercase_copy[k:lower()] = v 59 | end 60 | 61 | local mt = { 62 | __index = function(table, key) 63 | return lowercase_copy[key:lower()] 64 | end, 65 | __newindex = function(table, key, value) 66 | rawset(table, key, value) 67 | lowercase_copy[key:lower()] = value 68 | end 69 | } 70 | setmetatable(copy, mt) 71 | return copy 72 | end 73 | 74 | 75 | return treat 76 | -------------------------------------------------------------------------------- /splash/lua_modules/repr.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- A wrapper for inspect.lua. It discards metatables and supports multiple 3 | -- arguments. 4 | -- 5 | local inspect = require("vendor/inspect") 6 | 7 | local remove_all_metatables = function(item, path) 8 | if path[#path] ~= inspect.METATABLE then return item end 9 | end 10 | 11 | function repr(...) 12 | local args = table.pack(...) 13 | 14 | -- Don't touch single userdata results in order to support 15 | -- inline images in IPython notebook. 16 | if args.n == 1 and type(args[1]) == 'userdata' then 17 | return ... 18 | end 19 | 20 | -- run 'inspect' on all parts in case of multiple arguments 21 | for i=1,args.n do 22 | args[i] = inspect(args[i], {process = remove_all_metatables}) 23 | end 24 | return table.concat(args, ', ') 25 | end 26 | 27 | return repr 28 | -------------------------------------------------------------------------------- /splash/lua_modules/request.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- A wrapper for Response objects returned by Splash 3 | -- 4 | local wraputils = require("wraputils") 5 | local treat = require("libs/treat") 6 | 7 | local Request = wraputils.create_metatable() 8 | 9 | function Request._create(py_request) 10 | local request = { 11 | info=py_request.info, 12 | headers=treat.as_case_insensitive(py_request.headers), 13 | url=py_request.url, 14 | method=py_request.method, 15 | } 16 | 17 | return wraputils.wrap_exposed_object(py_request, request, Request) 18 | end 19 | 20 | return Request 21 | -------------------------------------------------------------------------------- /splash/lua_modules/response.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- A wrapper for Response objects returned by Splash 3 | -- 4 | local wraputils = require("wraputils") 5 | local treat = require("libs/treat") 6 | local Request = require("request") 7 | 8 | local Response = wraputils.create_metatable() 9 | 10 | function Response._create(py_response) 11 | local response = { 12 | headers=treat.as_case_insensitive(py_response.headers), 13 | request=Request._create(py_response.request), 14 | } 15 | 16 | return wraputils.wrap_exposed_object(py_response, response, Response) 17 | end 18 | 19 | 20 | return Response 21 | -------------------------------------------------------------------------------- /splash/lua_modules/splash.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- A module for creating Lua 'splash' object 3 | -- from a Python 'splash' object. 4 | -- 5 | 6 | local wraputils = require("wraputils") 7 | local Response = require("response") 8 | local Request = require("request") 9 | local Element = require("element") 10 | 11 | -- 12 | -- Lua wrapper for Splash Python object. 13 | -- 14 | local Splash = wraputils.create_metatable() 15 | 16 | 17 | function Splash._create(py_splash) 18 | local splash = { args = py_splash.args } 19 | return wraputils.wrap_exposed_object(py_splash, splash, Splash) 20 | end 21 | 22 | -- 23 | -- Create jsfunc method from private_jsfunc. 24 | -- It is required to handle errors properly. 25 | -- 26 | function Splash:jsfunc(...) 27 | local func = self:_jsfunc(...) 28 | return wraputils.unwraps_python_result(func) 29 | end 30 | 31 | -- 32 | -- Pass wrapped `request` and `response` objects to callbacks. 33 | -- 34 | function Splash:on_request(cb) 35 | if type(cb) ~= 'function' then 36 | error("splash:on_request callback is not a function", 2) 37 | end 38 | self:_on_request(function(py_request) 39 | local req = Request._create(py_request) 40 | return cb(req) 41 | end) 42 | end 43 | 44 | function Splash:on_response_headers(cb) 45 | if type(cb) ~= 'function' then 46 | error("splash:on_response_headers callback is not a function", 2) 47 | end 48 | self:_on_response_headers(function(response) 49 | local res = Response._create(response) 50 | return cb(res) 51 | end) 52 | end 53 | 54 | function Splash:on_response(cb) 55 | if type(cb) ~= 'function' then 56 | error("splash:on_response callback is not a function", 2) 57 | end 58 | self:_on_response(function(response) 59 | local res = Response._create(response) 60 | return cb(res) 61 | end) 62 | end 63 | 64 | function Splash:on_navigation_locked(cb) 65 | if type(cb) ~= 'function' then 66 | error("splash:on_navigation_locked callback is not a function", 2) 67 | end 68 | self:_on_navigation_locked(function(py_request) 69 | local req = Request._create(py_request) 70 | return cb(req) 71 | end) 72 | end 73 | 74 | 75 | -- 76 | -- Timer Lua wrapper 77 | -- 78 | local Timer = wraputils.create_metatable() 79 | 80 | function Timer._create(py_timer) 81 | local timer = {} 82 | return wraputils.wrap_exposed_object(py_timer, timer, Timer) 83 | end 84 | 85 | function Splash:call_later(cb, delay) 86 | local py_timer = self:_call_later(cb, delay) 87 | return Timer._create(py_timer) 88 | end 89 | 90 | return Splash 91 | -------------------------------------------------------------------------------- /splash/lua_modules/vendor/LICENSE-LEXER.md: -------------------------------------------------------------------------------- 1 | Copyright (C) 2009 Steve Donovan, David Manura. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 14 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 15 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 16 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 17 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR 18 | ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 21 | OR OTHER DEALINGS IN THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /splash/lua_runtime.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import weakref 4 | import contextlib 5 | 6 | from splash.lua import lua2python, python2lua, get_new_runtime 7 | from splash.utils import to_unicode 8 | 9 | 10 | class SplashLuaRuntime(object): 11 | """ 12 | Lua runtime wrapper, optionally with a sandbox. 13 | """ 14 | def __init__(self, sandboxed, lua_package_path, lua_sandbox_allowed_modules): 15 | """ 16 | :param bool sandboxed: whether the runtime should be sandboxed 17 | :param str lua_package_path: paths to add to Lua package.path 18 | :param iterable lua_sandbox_allowed_modules: a list of modules allowed 19 | to be required from a sandbox 20 | """ 21 | self._sandboxed = sandboxed 22 | self._lua = self._create_runtime(lua_package_path) 23 | self._setup_lua_sandbox(lua_sandbox_allowed_modules) 24 | self._allowed_object_attrs = weakref.WeakKeyDictionary() 25 | 26 | def add_to_globals(self, name, value): 27 | code = "function(%s_) %s = %s_ end" % (name, name, name) 28 | self.eval(code)(value) 29 | 30 | def table_from(self, *args, **kwargs): 31 | return self._lua.table_from(*args, **kwargs) 32 | 33 | def eval(self, *args, **kwargs): 34 | return self._lua.eval(*args, **kwargs) 35 | 36 | def execute(self, *args, **kwargs): 37 | return self._lua.execute(*args, **kwargs) 38 | 39 | def globals(self, *args, **kwargs): 40 | return self._lua.globals(*args, **kwargs) 41 | 42 | def add_allowed_object(self, obj, attr_whitelist): 43 | """ Add a Python object to a list of objects the runtime can access """ 44 | self._allowed_object_attrs[obj] = attr_whitelist 45 | 46 | def remove_allowed_object(self, obj): 47 | """ Remove an object from a list of objects the runtime can access """ 48 | if obj in self._allowed_object_attrs: 49 | del self._allowed_object_attrs[obj] 50 | 51 | def add_allowed_module(self, name): 52 | """ Allow to require specified module from Lua """ 53 | self._sandbox[b"allowed_require_names"][name.encode('utf8')] = True 54 | 55 | # def remove_allowed_module(self, name): 56 | # """ 57 | # Disallow specified module from Lua. 58 | # By default all modules are prohibited, so it only makes 59 | # sense to call this method if a module was previously added 60 | # by :meth:`add_allowed_module`. 61 | # """ 62 | # self._sandbox["allowed_require_names"][name] = False 63 | 64 | @contextlib.contextmanager 65 | def object_allowed(self, obj, attr_whitelist): 66 | """ Temporarily enable an access to a Python object """ 67 | self.add_allowed_object(obj, attr_whitelist) 68 | try: 69 | yield 70 | finally: 71 | self.remove_allowed_object(obj) 72 | 73 | def lua2python(self, *args, **kwargs): 74 | kwargs.setdefault("encoding", 'utf8') 75 | kwargs.setdefault("strict", True) 76 | return lua2python(self._lua, *args, **kwargs) 77 | 78 | def python2lua(self, *args, **kwargs): 79 | return python2lua(self._lua, *args, **kwargs) 80 | 81 | def instruction_count(self): 82 | if not self._sandboxed: 83 | return -1 84 | try: 85 | return self._sandbox.instruction_count 86 | except Exception as e: 87 | print(e) 88 | return -1 89 | 90 | def create_coroutine(self, func): 91 | """ 92 | Return a Python object which starts a coroutine when called. 93 | """ 94 | if self._sandboxed: 95 | return self._sandbox.create_coroutine(func) 96 | else: 97 | return func.coroutine 98 | 99 | def _create_runtime(self, lua_package_path): 100 | """ 101 | Return a restricted Lua runtime. 102 | Currently it only allows accessing attributes of this object. 103 | """ 104 | attribute_handlers = (self._attr_getter, self._attr_setter) 105 | runtime = get_new_runtime(attribute_handlers=attribute_handlers) 106 | self._setup_lua_paths(runtime, lua_package_path) 107 | return runtime 108 | 109 | def _setup_lua_paths(self, lua, lua_package_path): 110 | root = os.path.join(os.path.dirname(__file__), 'lua_modules') 111 | at_root = lambda *p: os.path.abspath(os.path.join(root, *p)) 112 | default_path = "{root}/?.lua;{libs}/?.lua".format( 113 | root=at_root(), 114 | libs=at_root('libs') 115 | ) 116 | if lua_package_path: 117 | packages_path = ";".join([default_path, lua_package_path]) 118 | else: 119 | packages_path = default_path 120 | 121 | lua.execute(""" 122 | package.path = "{packages_path};" .. package.path 123 | """.format(packages_path=packages_path)) 124 | 125 | @property 126 | def _sandbox(self): 127 | return self.eval("require('sandbox')") 128 | 129 | def _setup_lua_sandbox(self, allowed_modules): 130 | for name in allowed_modules: 131 | self.add_allowed_module(name) 132 | 133 | def _attr_getter(self, obj, attr_name): 134 | try: 135 | attr_name = to_unicode(attr_name) 136 | except TypeError: 137 | raise AttributeError("Non-string lookups are not allowed (requested: %r)" % attr_name) 138 | 139 | if obj not in self._allowed_object_attrs: 140 | raise AttributeError("Access to object %r is not allowed" % obj) 141 | 142 | if attr_name not in self._allowed_object_attrs[obj]: 143 | raise AttributeError("Access to private attribute %r is not allowed" % attr_name) 144 | 145 | value = getattr(obj, attr_name) 146 | return value 147 | 148 | def _attr_setter(self, obj, attr_name, value): 149 | raise AttributeError("Direct writing to Python objects is not allowed") 150 | -------------------------------------------------------------------------------- /splash/pool.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | import attr 4 | from twisted.internet import defer 5 | from twisted.python import log 6 | 7 | from splash.render_options import RenderOptions 8 | 9 | 10 | @attr.s 11 | class SlotArguments: 12 | rendercls = attr.ib() 13 | render_options = attr.ib() # type: RenderOptions 14 | splash_proxy_factory = attr.ib() 15 | kwargs = attr.ib() # type: Dict 16 | pool_d = attr.ib() # type: defer.Deferred 17 | 18 | 19 | class RenderPool(object): 20 | """A pool of renders. The number of slots determines how many 21 | renders will be run in parallel, at the most.""" 22 | 23 | def __init__(self, slots, 24 | network_manager_factory, 25 | splash_proxy_factory_cls, 26 | js_profiles_path, 27 | verbosity=1): 28 | self.network_manager_factory = network_manager_factory 29 | self.splash_proxy_factory_cls = splash_proxy_factory_cls or (lambda profile_name: None) 30 | self.js_profiles_path = js_profiles_path 31 | self.active = set() 32 | self.queue = defer.DeferredQueue() 33 | self.verbosity = verbosity 34 | for n in range(slots): 35 | self._wait_for_render(None, n, log=False) 36 | 37 | def render(self, rendercls, render_options, proxy, **kwargs): 38 | splash_proxy_factory = self.splash_proxy_factory_cls(proxy) 39 | pool_d = defer.Deferred() 40 | slot = SlotArguments( 41 | rendercls=rendercls, 42 | render_options=render_options, 43 | splash_proxy_factory=splash_proxy_factory, 44 | kwargs=kwargs, 45 | pool_d = pool_d, 46 | ) 47 | self.queue.put(slot) 48 | self.log("[%s] queued" % render_options.get_uid()) 49 | return pool_d 50 | 51 | def _wait_for_render(self, _, slot, log=True): 52 | if log: 53 | self.log("SLOT %d is available" % slot) 54 | d = self.queue.get() 55 | d.addCallback(self._start_render, slot) 56 | d.addBoth(self._wait_for_render, slot) 57 | return _ 58 | 59 | def _start_render(self, slot_args: SlotArguments, slot): 60 | self.log("initializing SLOT %d" % (slot, )) 61 | # FIXME: refactor. network manager only works for webkit. 62 | render = slot_args.rendercls( 63 | render_options=slot_args.render_options, 64 | verbosity=self.verbosity, 65 | network_manager=self.network_manager_factory(), 66 | splash_proxy_factory=slot_args.splash_proxy_factory, 67 | ) 68 | self.active.add(render) 69 | render.deferred.chainDeferred(slot_args.pool_d) 70 | slot_args.pool_d.addErrback(self._error, render, slot) 71 | slot_args.pool_d.addBoth(self._close_render, render, slot) 72 | 73 | self.log("[%s] SLOT %d is starting" % ( 74 | slot_args.render_options.get_uid(), slot)) 75 | try: 76 | render.start(**slot_args.kwargs) 77 | except: 78 | render.deferred.errback() 79 | raise 80 | self.log("[%s] SLOT %d is working" % ( 81 | slot_args.render_options.get_uid(), slot)) 82 | 83 | return render.deferred 84 | 85 | def _error(self, failure, render, slot): 86 | uid = render.render_options.get_uid() 87 | self.log("[%s] SLOT %d finished with an error %s: %s" % (uid, slot, render, failure)) 88 | return failure 89 | 90 | def _close_render(self, _, render, slot): 91 | uid = render.render_options.get_uid() 92 | self.log("[%s] SLOT %d is closing %s" % (uid, slot, render)) 93 | self.active.remove(render) 94 | render.deferred.cancel() 95 | render.close() 96 | self.log("[%s] SLOT %d done with %s" % (uid, slot, render)) 97 | return _ 98 | 99 | def log(self, text): 100 | if self.verbosity >= 2: 101 | log.msg(text, system='pool') 102 | -------------------------------------------------------------------------------- /splash/render_scripts.py: -------------------------------------------------------------------------------- 1 | """ 2 | Render scripts - objects which create a BrowserTab and use it to run 3 | some script in a browser. These objects are used by HTTP resources to 4 | do the work. 5 | """ 6 | import abc 7 | import functools 8 | 9 | from twisted.internet import defer 10 | 11 | from splash.errors import RenderError 12 | from splash.render_options import RenderOptions 13 | 14 | 15 | def stop_on_error(meth): 16 | @functools.wraps(meth) 17 | def stop_on_error_wrapper(self, *args, **kwargs): 18 | try: 19 | return meth(self, *args, **kwargs) 20 | except Exception as e: 21 | self.return_error(e) 22 | return stop_on_error_wrapper 23 | 24 | 25 | class BaseRenderScript(metaclass=abc.ABCMeta): 26 | """ 27 | Interface that all render scripts must implement. 28 | """ 29 | default_min_log_level = 2 30 | tab = None # create self.tab in __init__ method 31 | 32 | @abc.abstractmethod 33 | def __init__(self, render_options: RenderOptions, 34 | verbosity: int, **kwargs) -> None: 35 | """ 36 | BaseRenderScript.__init__ is called by Pool. 37 | """ 38 | self.render_options = render_options 39 | self.verbosity = verbosity 40 | 41 | # this deferred is fired with the render result when 42 | # the result is ready 43 | self.deferred = defer.Deferred() 44 | 45 | @abc.abstractmethod 46 | def start(self, **kwargs): 47 | """ 48 | This method is called by Pool when script should begin. 49 | As a result of calling this method, self.deferred should 50 | be eventually fired, usually by calling self.return_result or 51 | self.return_error. 52 | """ 53 | pass 54 | 55 | def log(self, text, min_level=None): 56 | if min_level is None: 57 | min_level = self.default_min_log_level 58 | self.tab.logger.log(text, min_level=min_level) 59 | 60 | def return_result(self, result): 61 | """ Return a result to the Pool. """ 62 | if self._result_already_returned(): 63 | self.tab.logger.log("error: result is already returned", min_level=1) 64 | 65 | self.deferred.callback(result) 66 | # self.deferred = None 67 | 68 | def return_error(self, error): 69 | """ Return an error to the Pool. """ 70 | if self._result_already_returned(): 71 | self.tab.logger.log("error: result is already returned", min_level=1) 72 | self.deferred.errback(error) 73 | # self.deferred = None 74 | 75 | def _result_already_returned(self): 76 | """ Return True if an error or a result is already returned to Pool """ 77 | return self.deferred.called 78 | 79 | def close(self): 80 | """ 81 | This method is called by a Pool after the rendering is done and 82 | the RenderScript object is no longer needed. 83 | """ 84 | self.tab.close() 85 | 86 | 87 | class BaseFixedRenderScript(BaseRenderScript): 88 | """ Base render script for pre-defined scenarios """ 89 | 90 | # start() method should set self.wait_time 91 | wait_time = 0 92 | 93 | def on_goto_load_finished(self): 94 | """ callback for tab.go """ 95 | if self.wait_time == 0: 96 | self.log("loadFinished; not waiting") 97 | self._load_finished_ok() 98 | else: 99 | time_ms = int(self.wait_time * 1000) 100 | self.log("loadFinished; waiting %sms" % time_ms) 101 | self.tab.wait( 102 | time_ms=time_ms, 103 | callback=self._load_finished_ok, 104 | onerror=self.on_goto_load_error, 105 | ) 106 | 107 | def on_goto_load_error(self, error_info): 108 | """ errback for tab.go """ 109 | ex = RenderError({ 110 | 'type': error_info.type, 111 | 'code': error_info.code, 112 | 'text': error_info.text, 113 | 'url': error_info.url 114 | }) 115 | self.return_error(ex) 116 | 117 | @abc.abstractmethod 118 | def _load_finished_ok(self): 119 | self.log("_loadFinishedOK") 120 | 121 | if self.tab.closing: 122 | self.log("loadFinishedOK is ignored because RenderScript is closing", min_level=3) 123 | return 124 | 125 | self.tab.stop_loading() 126 | # actual code should be defined in a subclass 127 | -------------------------------------------------------------------------------- /splash/response_middleware.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Classes that process (and maybe abort) responses based on 4 | various conditions. They should be used with 5 | :class:`splash.network_manager.SplashQNetworkAccessManager`. 6 | """ 7 | from PyQt5.QtNetwork import QNetworkRequest 8 | from splash.qtutils import request_repr 9 | from twisted.python import log 10 | import fnmatch 11 | 12 | 13 | class ContentTypeMiddleware(object): 14 | """ 15 | Response middleware, aborts responses depending on the content type. 16 | A response will be aborted (and the underlying connection closed) after 17 | receiving the response headers if the content type of the response is not 18 | in the whitelist or it's in the blacklist. Both lists support wildcards. 19 | """ 20 | def __init__(self, verbosity=0): 21 | self.verbosity = verbosity 22 | 23 | @staticmethod 24 | def contains(mime_set, mime): 25 | """ 26 | >>> ContentTypeMiddleware.contains({'*/*'}, 'any/thing') 27 | True 28 | >>> ContentTypeMiddleware.contains(set(), 'any/thing') 29 | False 30 | >>> ContentTypeMiddleware.contains({'text/css', 'image/*'}, 'image/png') 31 | True 32 | >>> ContentTypeMiddleware.contains({'*'}, 'any-thing') 33 | True 34 | """ 35 | for pattern in mime_set: 36 | if fnmatch.fnmatch(mime, pattern): 37 | return True 38 | return False 39 | 40 | @staticmethod 41 | def clean_mime(mime): 42 | """ 43 | Remove attributes from a mime string: 44 | >>> ContentTypeMiddleware.clean_mime(' text/html; charset=utf-8\t ') 45 | 'text/html' 46 | """ 47 | separator = mime.find(';') 48 | if separator > 0: 49 | mime = mime[:separator] 50 | return mime.strip() 51 | 52 | def process(self, reply, render_options): 53 | content_type = reply.header(QNetworkRequest.ContentTypeHeader) 54 | if content_type is None: 55 | return 56 | 57 | mimetype = self.clean_mime(content_type) 58 | allowed = render_options.get_allowed_content_types() 59 | forbidden = render_options.get_forbidden_content_types() 60 | whitelist = set(map(ContentTypeMiddleware.clean_mime, allowed)) 61 | blacklist = set(map(ContentTypeMiddleware.clean_mime, forbidden)) 62 | 63 | if self.contains(blacklist, mimetype) or not self.contains(whitelist, mimetype): 64 | if self.verbosity >= 2: 65 | request_str = request_repr(reply, reply.operation()) 66 | msg = "Dropping %s because of Content Type" % request_str 67 | log.msg(msg, system='response_middleware') 68 | reply.abort() 69 | -------------------------------------------------------------------------------- /splash/sentry.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | class SentryLogger(object): 4 | 5 | def __init__(self): 6 | try: 7 | import raven 8 | self.enabled = True 9 | dsn = os.environ['SPLASH_SENTRY_DSN'] 10 | if dsn.startswith('https'): 11 | dsn = dsn.replace('https://', 'twisted+https://') 12 | self.client = raven.Client(dsn) 13 | except (ImportError, KeyError): 14 | self.enabled = False 15 | 16 | def capture(self, failure): 17 | if self.enabled: 18 | self.client.captureException((failure.type, failure.value, failure.getTracebackObject())) 19 | 20 | capture = SentryLogger().capture 21 | -------------------------------------------------------------------------------- /splash/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/splash/ab28b0233c245461189881f1f5656b96371a4b40/splash/tests/__init__.py -------------------------------------------------------------------------------- /splash/tests/conftest.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pytest 3 | 4 | from .utils import MockServers, SplashServer 5 | 6 | 7 | @pytest.yield_fixture(scope="session") 8 | def test_servers(): 9 | with MockServers() as ts: 10 | yield ts 11 | 12 | 13 | @pytest.yield_fixture(scope="class") 14 | def class_ts(request, test_servers): 15 | """ Splash server and mockserver """ 16 | request.cls.ts = test_servers 17 | yield test_servers 18 | 19 | 20 | @pytest.yield_fixture(scope="session") 21 | def splash_unrestricted(): 22 | with SplashServer(extra_args=['--disable-lua-sandbox']) as splash: 23 | yield splash 24 | 25 | 26 | @pytest.yield_fixture(scope="class") 27 | def class_splash_unrestricted(request, splash_unrestricted): 28 | """ Non-sandboxed Splash server """ 29 | request.cls.splash_unrestricted = splash_unrestricted 30 | yield splash_unrestricted 31 | 32 | 33 | @pytest.yield_fixture(scope="session") 34 | def splash_strict_lua_runner(): 35 | with SplashServer(extra_args=['--strict-lua-runner']) as splash: 36 | yield splash 37 | 38 | 39 | @pytest.yield_fixture(scope="class") 40 | def class_splash_strict_lua_runner(request, splash_strict_lua_runner): 41 | """ Splash server with additional internal checks for Lua scripts """ 42 | request.cls.splash_strict_lua_runner = splash_strict_lua_runner 43 | yield splash_strict_lua_runner 44 | 45 | 46 | @pytest.fixture() 47 | def lua(request): 48 | import lupa 49 | lua = lupa.LuaRuntime(encoding=None) 50 | request.cls.lua = lua 51 | return lua 52 | 53 | 54 | @pytest.fixture() 55 | def configured_lua(): 56 | from splash.lua_runtime import SplashLuaRuntime 57 | return SplashLuaRuntime( 58 | sandboxed=False, 59 | lua_package_path="", 60 | lua_sandbox_allowed_modules=() 61 | ) 62 | 63 | 64 | @pytest.fixture() 65 | def completer(configured_lua): 66 | from splash.kernel.completer import Completer 67 | return Completer(configured_lua) 68 | 69 | 70 | @pytest.fixture() 71 | def lua_lexer(configured_lua): 72 | from splash.kernel.lua_parser import LuaLexer 73 | return LuaLexer(configured_lua) 74 | 75 | 76 | @pytest.fixture() 77 | def inspector(configured_lua): 78 | from splash.kernel.inspections import Inspector 79 | return Inspector(configured_lua) 80 | -------------------------------------------------------------------------------- /splash/tests/filters/noscript.txt: -------------------------------------------------------------------------------- 1 | ^script.js|$domain=0.0.0.0 2 | ^script.js|$domain=localhost 3 | ^script.js|$domain=127.0.0.1 4 | ^script2.js|$domain=example.com 5 | -------------------------------------------------------------------------------- /splash/tests/filters/noscript2.txt: -------------------------------------------------------------------------------- 1 | ^script2.js| 2 | -------------------------------------------------------------------------------- /splash/tests/js_profiles/test/lib1.js: -------------------------------------------------------------------------------- 1 | // 2 | // Javascript test library 3 | // 4 | function test(x){ 5 | return x; 6 | } 7 | -------------------------------------------------------------------------------- /splash/tests/js_profiles/test/lib2.js: -------------------------------------------------------------------------------- 1 | // 2 | // Javascript test library 3 | // 4 | function test2(x){ 5 | // Calls a function defined in another lib of the same js profile 6 | return test(x) + test(x); 7 | } 8 | -------------------------------------------------------------------------------- /splash/tests/js_profiles/test/lib_utf8.js: -------------------------------------------------------------------------------- 1 | // 2 | // Javascript test library 3 | // 4 | function test_utf8(x){ 5 | return x + '®'; 6 | } 7 | -------------------------------------------------------------------------------- /splash/tests/lua_modules/emulation.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- This module provides render_har, render_html and render_png methods 3 | -- which emulate render.har, render.html and render.png endpoints. 4 | -- They are used in tests; behaviour is not 100% the same. 5 | -- 6 | 7 | local Splash = require("splash") 8 | 9 | -- 10 | -- A method with a common workflow: go to a page, wait for some time. 11 | -- splash.qtrender.WebkitDefaultRenderScript implements a similar logic in Python. 12 | -- 13 | function Splash:go_and_wait(args) 14 | -- content-type for error messages. Callers should set their 15 | -- own content-type before returning the result. 16 | self:set_result_content_type("text/plain; charset=utf-8") 17 | 18 | -- prepare & validate arguments 19 | local args = args or self.args 20 | local url = args.url 21 | if not url then 22 | error("'url' argument is required") 23 | end 24 | local wait = tonumber(args.wait) 25 | if not wait and (self.args.render_all or self.args.viewport == "full") then 26 | error("non-zero 'wait' is required when rendering whole page") 27 | end 28 | if args.http2 ~= nil then 29 | self.http2_enabled = tonumber(args.http2) 30 | end 31 | 32 | self.images_enabled = self.args.images 33 | 34 | -- if viewport is 'full' it should be set only after waiting 35 | if args.viewport ~= nil and args.viewport ~= "full" then 36 | local w, h = string.match(args.viewport, '^(%d+)x(%d+)') 37 | if w == nil or h == nil then 38 | error('Invalid viewport size format: ' .. args.viewport) 39 | end 40 | self:set_viewport_size(tonumber(w), tonumber(h)) 41 | end 42 | 43 | -- set a resource timeout 44 | self.resource_timeout = args.resource_timeout 45 | self.response_body_enabled = args.response_body 46 | self.request_body_enabled = args.request_body 47 | 48 | local ok, reason = self:go{url=url, baseurl=args.baseurl} 49 | if not ok then 50 | -- render.xxx endpoints don't return HTTP errors as errors, 51 | -- so here we also only raising an exception is an error is not 52 | -- caused by a 4xx or 5xx HTTP response. 53 | if reason == 'render_error' or reason == 'error' then 54 | self:set_result_status_code(502) 55 | elseif string.sub(reason, 0,4) ~= 'http' then 56 | error(reason) 57 | end 58 | end 59 | 60 | assert(self:wait_restart_on_redirects(wait, 10)) 61 | end 62 | 63 | 64 | function Splash:wait_restart_on_redirects(time, max_redirects) 65 | if not time then 66 | return true 67 | end 68 | 69 | local redirects_remaining = max_redirects 70 | while redirects_remaining do 71 | local ok, reason = self:wait{time, cancel_on_redirect=true} 72 | if reason ~= 'redirect' then 73 | return ok, reason 74 | end 75 | redirects_remaining = redirects_remaining - 1 76 | end 77 | error("Maximum number of redirects happen") 78 | end 79 | 80 | 81 | -- 82 | -- "Endpoints" 83 | -- 84 | 85 | local emulation = {} 86 | 87 | 88 | function emulation.render_har(splash) 89 | splash:go_and_wait(splash.args) 90 | return splash:har() 91 | end 92 | 93 | 94 | function emulation.render_html(splash) 95 | splash:go_and_wait(splash.args) 96 | splash:set_result_content_type("text/html; charset=utf-8") 97 | return splash:html() 98 | end 99 | 100 | 101 | function emulation.render_png(splash) 102 | splash:go_and_wait(splash.args) 103 | splash:set_result_content_type("image/png") 104 | local render_all = (splash.args.render_all or 105 | splash.args.viewport == "full") 106 | return splash:png{ 107 | width=splash.args.width, 108 | height=splash.args.height, 109 | render_all=render_all, 110 | scale_method=splash.args.scale_method, 111 | } 112 | end 113 | 114 | 115 | function emulation.render_jpeg(splash) 116 | splash:go_and_wait(splash.args) 117 | splash:set_result_content_type("image/jpeg") 118 | local render_all = (splash.args.render_all or 119 | splash.args.viewport == "full") 120 | return splash:jpeg{ 121 | width=splash.args.width, 122 | height=splash.args.height, 123 | render_all=render_all, 124 | scale_method=splash.args.scale_method, 125 | quality=splash.args.quality, 126 | } 127 | end 128 | 129 | 130 | return emulation 131 | -------------------------------------------------------------------------------- /splash/tests/lua_modules/secret.lua: -------------------------------------------------------------------------------- 1 | -- this module is not listed in --lua-sandbox-allowed-modules 2 | local disabled = { 3 | hello="world" 4 | } 5 | return disabled 6 | -------------------------------------------------------------------------------- /splash/tests/lua_modules/utils.lua: -------------------------------------------------------------------------------- 1 | local utils = {} 2 | 3 | function utils.get_document_title(splash) 4 | return splash:evaljs("document.title") 5 | end 6 | 7 | local secret = require("secret") 8 | utils.hello = secret.hello 9 | 10 | return utils 11 | -------------------------------------------------------------------------------- /splash/tests/lua_modules/utils_patch.lua: -------------------------------------------------------------------------------- 1 | local Splash = require("splash") 2 | 3 | function Splash:get_document_title() 4 | return self:evaljs("document.title") 5 | end 6 | -------------------------------------------------------------------------------- /splash/tests/proxies.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import base64 3 | from twisted.web import proxy, http 4 | 5 | 6 | class ProxyClient(proxy.ProxyClient): 7 | def handleResponsePart(self, buffer): 8 | buffer = buffer.replace(b'', b' PROXY_USED') 9 | proxy.ProxyClient.handleResponsePart(self, buffer) 10 | 11 | 12 | class ProxyClientFactory(proxy.ProxyClientFactory): 13 | protocol = ProxyClient 14 | 15 | 16 | class ProxyRequest(proxy.ProxyRequest): 17 | protocols = {b'http': ProxyClientFactory} 18 | 19 | 20 | class Proxy(proxy.Proxy): 21 | requestFactory = ProxyRequest 22 | 23 | 24 | class ProxyFactory(http.HTTPFactory): 25 | protocol = Proxy 26 | 27 | 28 | class AuthProxyRequest(proxy.ProxyRequest): 29 | protocols = {b'http': ProxyClientFactory} 30 | valid_password = b"splash" 31 | 32 | def process(self): 33 | headers = self.getAllHeaders() 34 | auth = headers.get(b'proxy-authorization') 35 | valid_user = self.transport.protocol.factory.valid_user.encode("utf-8") 36 | 37 | if not auth: 38 | self.reject_request() 39 | return 40 | _, auth_string = auth.split() 41 | user, password = base64.b64decode(auth_string).split(b":", 1) 42 | 43 | if user != valid_user or password != self.valid_password: 44 | self.reject_request() 45 | return 46 | 47 | # can't use super() because old style classes 48 | proxy.ProxyRequest.process(self) 49 | 50 | def reject_request(self): 51 | self.setResponseCode(407) 52 | self.setHeader(b"Proxy-Authenticate", b"Basic realm: 'mockserver'") 53 | self.finish() 54 | 55 | 56 | class AuthProxy(proxy.Proxy): 57 | requestFactory = AuthProxyRequest 58 | 59 | 60 | class AuthProxyFactory(http.HTTPFactory): 61 | protocol = AuthProxy 62 | 63 | def __init__(self, user): 64 | http.HTTPFactory.__init__(self) 65 | self.valid_user = user 66 | -------------------------------------------------------------------------------- /splash/tests/proxy_profiles/no-proxy-settings.ini: -------------------------------------------------------------------------------- 1 | [rules] 2 | 3 | whitelist = 4 | .* 5 | 6 | blacklist = 7 | .*\.js$ 8 | .*1\.html$ 9 | -------------------------------------------------------------------------------- /splash/tests/proxy_profiles/test.ini: -------------------------------------------------------------------------------- 1 | [proxy] 2 | host = 0.0.0.0 3 | port = 8990 4 | 5 | [rules] 6 | 7 | allowlist = 8 | .* 9 | 10 | denylist = 11 | .*\.js$ 12 | .*1\.html$ 13 | -------------------------------------------------------------------------------- /splash/tests/proxy_profiles/test_deprecated.ini: -------------------------------------------------------------------------------- 1 | [proxy] 2 | host = 0.0.0.0 3 | port = 8990 4 | 5 | [rules] 6 | 7 | whitelist = 8 | .* 9 | 10 | blacklist = 11 | .*\.js$ 12 | .*1\.html$ 13 | -------------------------------------------------------------------------------- /splash/tests/server.pem: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIIFIDCCAwigAwIBAgIJAMfvYR3VankyMA0GCSqGSIb3DQEBCwUAMBQxEjAQBgNV 3 | BAMTCWxvY2FsaG9zdDAeFw0xNzA2MzAwMDQ4MDlaFw0xNzA3MzAwMDQ4MDlaMBQx 4 | EjAQBgNVBAMTCWxvY2FsaG9zdDCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoC 5 | ggIBAOJvat9vpUafwZBFrBHRZUjTrsFTKOPpxvM81ddRjtXRPltgGKmvTs7+oItO 6 | NApRHpRClt5SDIjZBVeTCIxgOuOZ9bKYo4HXANm8tEMuqfdQrQHPlLrd1cEVtPBb 7 | DRU0x614TWRrONJdKjjGPHR+rTA63Dic8cE52WM23QopdhIFy0h2VJn2fwnsvvSJ 8 | YWsHUTd5FK268XdqMMpY3+zDVlcMPzDIXvmKOOdUwpOfXteDeF+L6VbBWZ3dAL+Z 9 | WC4Imm+m3MJ5CkCHQLhio80KXrcPyG1kJUkjyOAnF4EaXV4vj0x1I0LFGLGdCU9i 10 | t9XagSos9BEvq0+vdODvf7jwYW43SI7dahcqnP9BLkrcj7wUGXQd9R1G7EWwlIBl 11 | 1SJjGq8h5WowPa+JsRsSiiGYSxCbSVDkjSlcPlWDJhKnHTw6LdoeT6/MfEcYNCVK 12 | pY6LE0eL2LODE1PDPzFjkG8rsPkzT8HAW1BavLSwUPp1oAopR/CjtL5Aa/FG8P7j 13 | agxXKQ4h4fGLx4aT9xgfqUxieJ2z9qrMYhPQ97N9RYg3VfPAxs/N/UTGDsUoASUN 14 | AtISlphoFi+GkBjCWQYF0hnLSjg6zHpLz1wHkM5e9Le6Xk8e6NA3KXthKG28ys9j 15 | bYY535Qgev+ZLnwLYN3+Ki1Fk52n9WeRneF6t7i8ACow9Ze/AgMBAAGjdTBzMB0G 16 | A1UdDgQWBBRG8QhBo5/wEJDEpEyXKqseTOVqFDBEBgNVHSMEPTA7gBRG8QhBo5/w 17 | EJDEpEyXKqseTOVqFKEYpBYwFDESMBAGA1UEAxMJbG9jYWxob3N0ggkAx+9hHdVq 18 | eTIwDAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAgEAxXNQ17VsE784L6+g 19 | z472l/rvGFkO777x4rYmca9/4+g7wgmttJiNv5U6ftgTWi7MeuRG/ZSzTxgIKNRF 20 | kwYY7epOgluGglDC5y0fIasCfXBHQPhtt+zabgTMpqSRzgWIuGEBtpHYbQmQPRMB 21 | WsrUM52LdlR/w06opoGwQZYrT8xFv1nrWHmDOzV4+MXal4ZmYYrECZSxGM84YagI 22 | crSMxHFAzp3iXvUIFgDQ2aVUROe9DvNJqXFMtqZCGlZwrizDjuCQ+cNE3Ac7eZGh 23 | BUSlcYtj6w1Lik2PX33FddYbCW4DrNGr4VG4xY/T745HzyWcWpsHusiMlgciSL3s 24 | p70tAN3/PzIBEhLwRuhUl8cmIp/mpFGgUP9RyZ9nMXChB0eSFumCGsYxRYSHqa+o 25 | 02pX32Q6UC+u/WqscNpdH+OFqiwfFjq0JuObp05IGf79ISLY8yj0qG1wHCCT7701 26 | zLM8ugcGun9r8HzejVcwBivayRGZW7T/dD3qIKFTJmzNbJMZzsfcY8505FHKL5qz 27 | HhMEgXpF9LK4vRL0zGfwkWCPehyyCjA42GxugEPYKu/wa/su4GZIm1I4IMtgB/d1 28 | gj9ePVj2tTsQQ6PORauzWfdEVziy+v8YNTf+3bN9Lth7/0Js8PdEN8+E9pbVxXDk 29 | OEiRJ1RQD+jpIrCuHp3wbmp+OCs= 30 | -----END CERTIFICATE----- 31 | -----BEGIN RSA PRIVATE KEY----- 32 | MIIJJwIBAAKCAgEA4m9q32+lRp/BkEWsEdFlSNOuwVMo4+nG8zzV11GO1dE+W2AY 33 | qa9Ozv6gi040ClEelEKW3lIMiNkFV5MIjGA645n1spijgdcA2by0Qy6p91CtAc+U 34 | ut3VwRW08FsNFTTHrXhNZGs40l0qOMY8dH6tMDrcOJzxwTnZYzbdCil2EgXLSHZU 35 | mfZ/Cey+9IlhawdRN3kUrbrxd2owyljf7MNWVww/MMhe+Yo451TCk59e14N4X4vp 36 | VsFZnd0Av5lYLgiab6bcwnkKQIdAuGKjzQpetw/IbWQlSSPI4CcXgRpdXi+PTHUj 37 | QsUYsZ0JT2K31dqBKiz0ES+rT6904O9/uPBhbjdIjt1qFyqc/0EuStyPvBQZdB31 38 | HUbsRbCUgGXVImMaryHlajA9r4mxGxKKIZhLEJtJUOSNKVw+VYMmEqcdPDot2h5P 39 | r8x8Rxg0JUqljosTR4vYs4MTU8M/MWOQbyuw+TNPwcBbUFq8tLBQ+nWgCilH8KO0 40 | vkBr8Ubw/uNqDFcpDiHh8YvHhpP3GB+pTGJ4nbP2qsxiE9D3s31FiDdV88DGz839 41 | RMYOxSgBJQ0C0hKWmGgWL4aQGMJZBgXSGctKODrMekvPXAeQzl70t7peTx7o0Dcp 42 | e2EobbzKz2NthjnflCB6/5kufAtg3f4qLUWTnaf1Z5Gd4Xq3uLwAKjD1l78CAwEA 43 | AQKCAgB1rEUz/Ic9noQu/6YeVF14bVmh6h+T4cecXH8cuKur/mOThOsMM6iW9chk 44 | 3K06t1FW5wwUWTteWrnl+uk0WkYAReBjgeNm9F7iyKg9SaDWf7yI9mFqgyI0h51S 45 | U18HKDiks6I0j2wmFnh8rLh6KeLC77PGT/lq2y9X2gkg4M8VKqGr9KKeqHT4qWnL 46 | FyHjRxgz5d6TMlWOI0dbwLgOCK1z609jgJ2qvOQ5g9jicq4M/auuq70FctGpslJT 47 | P9ONiLcJUwPipJhpVGCT8d8CDWOPaspT3vlpewlHQ8l5NERVUA3+Q9mhN2BP3TYb 48 | ZUf4AOejDV2BGO7EKIBq7MvJRlK6w6kMk9RbCPVWJkl3VFA8H9ozzc6NikCGO+uz 49 | 6jrcxx7u9FAQxsmZJ9m1e5fydrx/p3BVs340dAtSEXPwuITsUKd9JPhdgIyHbj5z 50 | JlvdoJDwCrUPLFjSEjtyPt1mS3b8cxaUejNX6OASWcXwF7nnEGJaS6M++ib0EYYU 51 | gZHAjzoILrdPKomGhBgaNG7LVoCAcvNvDZAiHgc9gFrq26SS1s+Yv8pn9+Z8XbyL 52 | WBQKqKX3AzGvHMOOF1jCDSPsraUBkJOR+NMb4VvoHeADyarvuLEME6Uy5p907Bkx 53 | 82TGg/vW/r7AueQQzMzs1MkgC1OMhZp6r1heUhIW+b7SZq5OwQKCAQEA/qo+GfEE 54 | t4CT00e6J8qr34N/+FIlsee77+y7Ijm7QKlO5ZeUOlzI87uYfi1fMN1zRwtmoBP5 55 | /G7qP3UdzoUBdYMvW7RtyjYSqLKEsg9WP534uBsyf8jBDNO0A0Hg1oEAboLh4u9I 56 | 3kXSQM4HF0OwGu30cNpE3LrgE0cY4Pc6VgpvUwqkj3y/tZrYRd0JE9o0An8qSSSy 57 | 3Lbhvnr3LSPQH+lKftYLyvT/DVKEBULaf8Cp9uki8pnk5NSHp4Zv3/vfXpxwjgkK 58 | O8+/V/+Cak+4fOvqsdouPX3/f8rP4x6YDfGzEHG2GFAvGgA0m1XT4V9tMgR8ekUm 59 | mBEd60RDzt3CkQKCAQEA459KdUNpgw705UB/Ik4G/UqQnL3lfbsY7Np2s4IYLi+a 60 | 7kYw1GG8SVvFUSsW3symwV4nF899riKBrE6bqnIJ/R4jbZUe6iF2wZLI5NesqdE3 61 | WROGUvPA20UTjiklDIW8lyMEkfEWSs9YdlWckc3oDqvtB73gXwgq8zces8sBBVMP 62 | fbX1QMcu+elZPN/DPqee/W4qly4MGwsrYEohBf2nm/nr9djyVnBZESEVYeS3uPSC 63 | 7l32/KmtYwF751aURk3jFArbXGlk5koPXAtuCQjpE9fov2dGb5SizDCs1E7qf2+T 64 | +67pYFxVYpCAAHuWSZVKLH8ZlxNautcA87e8EYo9TwKCAQBk8SQGclNVTSDLGV4/ 65 | UYVI5j2+Nwl7+Ae1dRdD3LBNVA7S0H9PluGMQqqSMPfPTafY3DsEboajOW+eUZTL 66 | /Ep0wtftuJngEgjZV1FjptKgCsTAbP9aST/EDtIGjIFLZPy/0R8qLYCroe9SApgq 67 | 74iJVBKU/N9YwrX4zjCdU0GLQz2XI9vQiDkRCdx57MAhX1dIOhGPxn0jIVZfgywg 68 | d1aSn40Is10fSHckV7W+xB4Jj1uPZB+XG8rvrRJMI+yKUrLaNJYkpo6o4JLOlZei 69 | /hB5a6Wuc3Xqp9uzUreOJd1NgaVhekEcaAQTgUtY2u9OIN4OelrAuFIw6m/6hid+ 70 | gsYRAoIBABT7ROjEOwCaq1ZKeK2X1asZaXUZSAMr8OM8S/7F+iafur9F9xBMpgd5 71 | DxopR6Vn5eMjI6VdOe+IPL2+TTkL5AHR+mAKg9v/E6xKNHxrqP3KTDjIxPP2hgtw 72 | NZpADqs11Kl1z+EIAQ8KR9Q4lgMWoNJhZKr3BKyd2x+qMAlQqBpuipoC4MWE1r6w 73 | lMo4sg2fc9dWhVSEKReBExGAI8UimYWWJvr1cXkoR/19ODRa2diRqVQBk1e86D+d 74 | DmfHoklMuqbJTWkaSfO0VfAdGZp5mMp8d3bVNTfp0ovR3MyBLbprdY+YkuiP+kro 75 | mPPycGr9+OVbHefAH/fjA7JzeEGBpZ8CggEALVfF75/jczv7ygw8KSoDbLkpoFfe 76 | TwMHsGCdenuiRyEA0OLOkzubGlMsmSZt/rT01CJ4sRQY2pgyKqBu/+sVc+0r4siA 77 | P9hUTLqkVwqbLMXbDu5Qj4TYLxuIg8y66jn+EpHvy6Q+eNYnopSF9caiNIGZDWB9 78 | zbN5Hys9HIwnmvdOmWPltqnlDtvqxXaD3um/pyMO3Xy5TkSL2VfnWBDRdtmMQBhA 79 | UdW4blaw2zxRN6EkVnEKOYAnCObqSdc7v82ON5dy3Hfbppvj0vA9I6tR0Zi1DmAa 80 | YmOZ3ap80+yFvoTF4szxaYtTSRtNgvh4eHA+C3dDMq4gKfdD4LpA1flxlA== 81 | -----END RSA PRIVATE KEY----- 82 | -------------------------------------------------------------------------------- /splash/tests/test_argument_caching.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import hashlib 3 | 4 | from .test_render import BaseRenderTest 5 | from .test_execute import BaseLuaRenderTest 6 | from .test_jsonpost import JsonPostRequestHandler 7 | 8 | 9 | class RenderHtmlArgumentCachingTest(BaseRenderTest): 10 | endpoint = 'render.html' 11 | 12 | def test_cache_url(self): 13 | # make a save_args request 14 | resp = self.request({ 15 | "url": self.mockurl('jsrender'), 16 | "wait": 0.5, 17 | "save_args": "url,wait", 18 | }) 19 | self.assertStatusCode(resp, 200) 20 | self.assertIn("After", resp.text) 21 | 22 | # use load_args to avoid sending parameter values 23 | header = resp.headers['X-Splash-Saved-Arguments'] 24 | resp2 = self.request({"load_args": header}) 25 | self.assertStatusCode(resp2, 200) 26 | assert resp2.text == resp.text 27 | 28 | # clear cache 29 | resp3 = self.post({}, endpoint="_gc") 30 | self.assertStatusCode(resp3, 200) 31 | data = resp3.json() 32 | assert data['cached_args_removed'] >= 2 33 | assert data['pyobjects_collected'] > 0 34 | assert data['status'] == 'ok' 35 | 36 | # check that argument cache is cleared 37 | resp4 = self.request({"load_args": header}) 38 | data = self.assertJsonError(resp4, 498, 'ExpiredArguments') 39 | assert set(data['info']['expired']) == {'wait', 'url'} 40 | 41 | 42 | class ArgumentCachingTest(BaseLuaRenderTest): 43 | request_handler = JsonPostRequestHandler 44 | 45 | def test_cache_args(self): 46 | resp = self.request_lua(""" 47 | function main(splash) 48 | return {foo=splash.args.foo, baz=splash.args.baz} 49 | end 50 | """, { 51 | "save_args": ["lua_source", "foo", "bar"], 52 | "foo": "hello", 53 | "baz": "world", 54 | }) 55 | self.assertStatusCode(resp, 200) 56 | self.assertEqual(resp.json(), {"foo": "hello", "baz": "world"}) 57 | 58 | hashes = dict( 59 | h.split("=", 1) for h in 60 | resp.headers['X-Splash-Saved-Arguments'].split(";") 61 | ) 62 | resp2 = self.request({"load_args": hashes, "baz": "!"}) 63 | self.assertStatusCode(resp2, 200) 64 | self.assertEqual(resp2.json(), {"foo": "hello", "baz": "!"}) 65 | 66 | hashes["foo"] = hashlib.sha1(b"invalid").hexdigest() 67 | resp3 = self.request({"load_args": hashes, "baz": "!"}) 68 | data = self.assertJsonError(resp3, 498, "ExpiredArguments") 69 | self.assertEqual(data['info'], {'expired': ['foo']}) 70 | 71 | def test_bad_save_args(self): 72 | resp = self.request_lua("function main(splash) return 'hi' end", { 73 | "save_args": {"lua_source": "yes"}, 74 | }) 75 | self.assertBadArgument(resp, "save_args") 76 | 77 | resp = self.request_lua("function main(splash) return 'hi' end", { 78 | "save_args": ["foo", 324], 79 | }) 80 | self.assertBadArgument(resp, "save_args") 81 | 82 | def test_bad_load_args(self): 83 | resp = self.request({"load_args": "foo"}) 84 | self.assertBadArgument(resp, "load_args") 85 | 86 | resp = self.request({"load_args": [("foo", "bar")]}) 87 | self.assertBadArgument(resp, "load_args") 88 | -------------------------------------------------------------------------------- /splash/tests/test_callback_proxy.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import unittest 3 | 4 | from splash.browser_tab import OneShotCallbackProxy 5 | from splash.log import SplashLogger 6 | 7 | 8 | class OneShotCallbackProxyTest(unittest.TestCase): 9 | """ 10 | The QTimer that OneShotCallbackProxy is based on won't work 11 | in a single-threaded unit test, so the timeout behavior isn't tested 12 | here; it is tested in test_execute.py::WaitForResume. 13 | """ 14 | 15 | def setUp(self): 16 | # There's no mock library in the project, so we have a simple way 17 | # to count how many times our callback and errback are called. 18 | self._callback_count = 0 19 | self._errback_count = 0 20 | self._raise_count = 0 21 | 22 | def _make_proxy(self): 23 | def callback(val): 24 | self._callback_count += 1 25 | 26 | def errback(message, raise_): 27 | self._errback_count += 1 28 | 29 | if raise_: 30 | raise Exception() 31 | 32 | class Logger(SplashLogger): 33 | def __init__(self, uid, verbosity): 34 | self.messages = [] 35 | super().__init__(uid, verbosity) 36 | 37 | def log(self, message, min_level=None): 38 | self.messages.append((message, min_level)) 39 | super().log(message, min_level) 40 | 41 | logger = Logger(uid=0, verbosity=2) 42 | return OneShotCallbackProxy(None, callback, errback, logger, timeout=0) 43 | 44 | def _assertLastMessageWarns(self, cb_proxy: OneShotCallbackProxy): 45 | assert cb_proxy.logger.messages[-1][1] == 1 46 | 47 | def test_can_resume_once(self): 48 | cb_proxy = self._make_proxy() 49 | cb_proxy.resume('ok') 50 | self.assertEqual(self._callback_count, 1) 51 | self.assertEqual(self._errback_count, 0) 52 | 53 | def test_can_error_once(self): 54 | cb_proxy = self._make_proxy() 55 | cb_proxy.error('not ok') 56 | self.assertEqual(self._callback_count, 0) 57 | self.assertEqual(self._errback_count, 1) 58 | 59 | def test_can_error_with_raise(self): 60 | cb_proxy = self._make_proxy() 61 | 62 | with self.assertRaises(Exception): 63 | cb_proxy.error('not ok', raise_=True) 64 | 65 | self.assertEqual(self._callback_count, 0) 66 | self.assertEqual(self._errback_count, 1) 67 | 68 | def test_cannot_resume_twice(self): 69 | cb_proxy = self._make_proxy() 70 | cb_proxy.resume('ok') 71 | 72 | cb_proxy.resume('still ok?') 73 | self._assertLastMessageWarns(cb_proxy) 74 | 75 | def test_cannot_resume_and_error(self): 76 | cb_proxy = self._make_proxy() 77 | cb_proxy.resume('ok') 78 | cb_proxy.error('still ok?') 79 | self._assertLastMessageWarns(cb_proxy) 80 | 81 | def test_cannot_resume_after_cancel(self): 82 | cb_proxy = self._make_proxy() 83 | cb_proxy.cancel('changed my mind') 84 | cb_proxy.resume('ok') 85 | self._assertLastMessageWarns(cb_proxy) 86 | 87 | def test_negative_timeout_is_invalid(self): 88 | with self.assertRaises(ValueError): 89 | logger = SplashLogger(uid=0, verbosity=2) 90 | cb_proxy = OneShotCallbackProxy(None, lambda a: a, lambda b: b, 91 | logger, -1) 92 | -------------------------------------------------------------------------------- /splash/tests/test_client_disconnects.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import random 4 | import unittest 5 | import time 6 | from http.client import HTTPConnection 7 | from urllib.parse import urlencode 8 | 9 | import pytest 10 | lupa = pytest.importorskip("lupa") 11 | 12 | 13 | @pytest.mark.usefixtures("class_splash_unrestricted") 14 | class StopProcessingTest(unittest.TestCase): 15 | """ 16 | These tests check that script is stopped after connection 17 | is closed. 18 | 19 | We can't use splash:http_get or XmlHTTPRequest because 20 | they don't work when page is stopped, and there is no other 21 | way to communicate with outside world in sandboxed Splash, 22 | so a non-sandboxed version is started. 23 | """ 24 | CREATE_FILE = """ 25 | function create_file(filename, contents) 26 | fp = io.open(filename, "w") 27 | fp:write(contents) 28 | fp:close() 29 | print("file created") 30 | end 31 | """ 32 | 33 | def get_random_filename(self): 34 | tempdir = self.splash_unrestricted.tempdir 35 | return os.path.join(tempdir, str(random.random())) 36 | 37 | def open_http_connection(self, code, query=None, method='GET'): 38 | """ 39 | Send a request to non-sandboxed Splash, return an HTTPConnection. 40 | create_file Lua function is pre-loaded. 41 | 42 | XXX: why can't we use requests or urllib, why 43 | don't they close a connection after a timeout error? 44 | """ 45 | q = {"lua_source": self.CREATE_FILE + "\n" + code} 46 | q.update(query or {}) 47 | conn = HTTPConnection('localhost', self.splash_unrestricted.portnum) 48 | conn.request(method, "/execute/?" + urlencode(q)) 49 | return conn 50 | 51 | def assertScriptStopped(self, script, min_time=0.1, max_time=1.5): 52 | """ 53 | Check that script is stopped after a timeout. 54 | A script must accept 'filename' argument and create a new file 55 | with this name between min_time and max_time. 56 | """ 57 | filename = self.get_random_filename() 58 | conn = self.open_http_connection(script, {'filename': filename}) 59 | time.sleep(min_time) 60 | assert not os.path.exists(filename) # not yet created 61 | 62 | conn.close() 63 | 64 | time.sleep(max_time + min_time) 65 | assert not os.path.exists(filename) # script is aborted 66 | 67 | def test_wait_timer_stopped_after_request_finished(self): 68 | self.assertScriptStopped(""" 69 | function main(splash) 70 | splash:wait(0.7) 71 | create_file(splash.args.filename, "not empty") 72 | return "ok" 73 | end 74 | """) 75 | 76 | def test_call_later_stopped(self): 77 | self.assertScriptStopped(""" 78 | function main(splash) 79 | splash:call_later(function() 80 | create_file(splash.args.filename, "not empty") 81 | end, 0.7) 82 | splash:wait(1.0) 83 | return "ok" 84 | end 85 | """) 86 | 87 | def test_js_timer(self): 88 | self.assertScriptStopped(""" 89 | function main(splash) 90 | splash:wait_for_resume([[ 91 | function main(splash){ 92 | setTimeout(function () { 93 | splash.resume(); 94 | }, 700); 95 | } 96 | ]]) 97 | create_file(splash.args.filename, "not empty") 98 | return "ok" 99 | end""") 100 | 101 | def test_js_timer_aborted(self): 102 | self.assertScriptStopped(""" 103 | function main(splash) 104 | splash:wait_for_resume([[ 105 | function main(splash){ 106 | setTimeout(function () { 107 | splash.resume(); 108 | }, 10000); 109 | } 110 | ]], 0.7) 111 | create_file(splash.args.filename, "not empty") 112 | return "ok" 113 | end""") 114 | -------------------------------------------------------------------------------- /splash/tests/test_execute_emulation.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import unittest 3 | 4 | import pytest 5 | lupa = pytest.importorskip("lupa") 6 | 7 | from . import test_render, test_redirects, test_har 8 | from .utils import NON_EXISTING_RESOLVABLE 9 | 10 | 11 | class Base: 12 | # a hack to skip test running from a mixin 13 | class EmulationMixin(test_render.BaseRenderTest): 14 | endpoint = 'execute' 15 | 16 | def request(self, query, endpoint=None, headers=None, **kwargs): 17 | query = {} or query 18 | query.update({'lua_source': self.script}) 19 | return self._get_handler().request(query, endpoint, headers, **kwargs) 20 | 21 | def post(self, query, endpoint=None, payload=None, headers=None, **kwargs): 22 | raise NotImplementedError() 23 | 24 | # ==== overridden tests ============================= 25 | @unittest.skipIf(NON_EXISTING_RESOLVABLE, "non existing hosts are resolvable") 26 | def test_render_error(self): 27 | r = self.request({"url": "http://non-existent-host/"}) 28 | err = self.assertJsonError(r, 400) 29 | 30 | @pytest.mark.xfail( 31 | run=False, 32 | reason="wait time validation is not implemented in emulation scripts" 33 | ) 34 | def test_invalid_wait(self): 35 | super().test_invalid_wait() 36 | 37 | def test_self(self): 38 | # make sure mixin order is correct 39 | assert self.endpoint == 'execute' 40 | 41 | 42 | class EmulatedRenderHtmlTest(Base.EmulationMixin, test_render.RenderHtmlTest): 43 | script = 'main = require("emulation").render_html' 44 | 45 | 46 | class EmulatedHttpRedirectTest(Base.EmulationMixin, test_redirects.HttpRedirectTest): 47 | script = 'main = require("emulation").render_html' 48 | 49 | 50 | class EmulatedJsRedirectTest(Base.EmulationMixin, test_redirects.JsRedirectTest): 51 | script = 'main = require("emulation").render_html' 52 | 53 | # Overridden to return 400. 54 | @unittest.skipIf(NON_EXISTING_RESOLVABLE, "non existing hosts are resolvable") 55 | def test_redirect_to_non_existing(self): 56 | r = self.request({ 57 | "url": self.mockurl("jsredirect-non-existing"), 58 | "wait": 2., 59 | }) 60 | self.assertJsonError(r, 400) 61 | 62 | 63 | class EmulatedMetaRedirectTest(Base.EmulationMixin, test_redirects.MetaRedirectTest): 64 | script = 'main = require("emulation").render_html' 65 | 66 | 67 | class EmulatedRenderPngTest(Base.EmulationMixin, test_render.RenderPngTest): 68 | script = 'main = require("emulation").render_png' 69 | 70 | @pytest.mark.xfail( 71 | run=False, 72 | reason=""" 73 | Range validation in lua renderer is not implemented and out of range values of 74 | width/height will consume huge amount of memory either bringing down the test 75 | server because of OOM killer or grinding user system to a halt because of swap. 76 | """) 77 | def test_range_checks(self): 78 | super(EmulatedRenderPngTest, self).test_range_checks() 79 | 80 | def test_extra_height_doesnt_leave_garbage_when_using_tiled_render(self): 81 | # XXX: this function belongs to test_render, BUT height < 1000 is fixed 82 | # in defaults and so is tile max size, so in order to force rendering 83 | # that may produce extra pixels at the bottom we go the way that avoids 84 | # parameter validation. 85 | r = self.request({'url': self.mockurl('tall'), 'viewport': '100x100', 86 | 'height': 3000}) 87 | png = self.assertPng(r, height=3000) 88 | # Ensure that the extra pixels at the bottom are transparent. 89 | alpha_channel = png.crop((0, 100, 100, 3000)).getdata(3) 90 | self.assertEqual(alpha_channel.size, (100, 2900)) 91 | self.assertEqual(alpha_channel.getextrema(), (0, 0)) 92 | 93 | 94 | class EmulatedRenderJpegTest(Base.EmulationMixin, test_render.RenderJpegTest): 95 | script = 'main = require("emulation").render_jpeg' 96 | 97 | @pytest.mark.xfail( 98 | run=False, 99 | reason=""" 100 | Range validation in lua renderer is not implemented and out of range values of 101 | width/height will consume huge amount of memory either bringing down the test 102 | server because of OOM killer or grinding user system to a halt because of swap. 103 | """) 104 | def test_range_checks(self): 105 | super(EmulatedRenderJpegTest, self).test_range_checks() 106 | 107 | def test_extra_height_doesnt_leave_garbage_when_using_tiled_render(self): 108 | # XXX: this function belongs to test_render, BUT height < 1000 is fixed 109 | # in defaults and so is tile max size, so in order to force rendering 110 | # that may produce extra pixels at the bottom we go the way that avoids 111 | # parameter validation. 112 | r = self.request({'url': self.mockurl('tall'), 'viewport': '100x100', 113 | 'height': 3000}) 114 | img = self.assertJpeg(r, height=3000) 115 | # Ensure that the extra pixels at the bottom are transparent. 116 | box = img.crop((0, 100, 100, 3000)) 117 | self.assertEqual(box.size, (100, 2900)) 118 | # iterate over channels 119 | for i in range(3): 120 | self.assertEqual(box.getdata(i).getextrema(), (255, 255)) 121 | 122 | 123 | class EmulatedRenderHarTest(Base.EmulationMixin, test_har.HarRenderTest): 124 | script = 'main = require("emulation").render_har' 125 | -------------------------------------------------------------------------------- /splash/tests/test_execute_request_filters.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from splash.tests.test_execute import BaseLuaRenderTest 3 | from splash.tests.test_request_filters import BaseFiltersTest 4 | 5 | 6 | class ExecuteFiltersTest(BaseLuaRenderTest, BaseFiltersTest): 7 | 8 | def _lua_render_html(self, **kwargs): 9 | return self.request_lua(""" 10 | function main(splash) 11 | assert(splash:go(splash.args.url)) 12 | return splash:html() 13 | end 14 | """, self.params(**kwargs)) 15 | 16 | def test_filters_applied_for_execute_endpoint(self): 17 | r = self._lua_render_html() 18 | self.assertFiltersWork(r, noscript=False, noscript2=False) 19 | 20 | r = self._lua_render_html(filters='noscript') 21 | self.assertFiltersWork(r, noscript=True, noscript2=False) 22 | 23 | r = self._lua_render_html(filters='noscript2') 24 | self.assertFiltersWork(r, noscript=False, noscript2=True) 25 | 26 | def test_no_url_argument(self): 27 | resp = self.request_lua(""" 28 | function main(splash) 29 | assert(splash:go(splash.args.address)) 30 | return splash:html() 31 | end 32 | """, dict( 33 | address=self.mockurl('iframes'), 34 | filters='noscript' 35 | )) 36 | self.assertFiltersWork(resp, noscript=True, noscript2=False) 37 | -------------------------------------------------------------------------------- /splash/tests/test_har_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import base64 3 | 4 | import pytest 5 | 6 | from splash.har.utils import get_response_body_bytes 7 | 8 | 9 | def get_har_response(text, encoding): 10 | har_response = { 11 | "status": 200, 12 | "statusText": "OK", 13 | "httpVersion": "HTTP/1.1", 14 | "cookies": [], 15 | "headers": [], 16 | "content": { 17 | "size": len(text), 18 | "compression": 0, 19 | "mimeType": "text/html; charset=utf-8", 20 | "text": text, 21 | }, 22 | "redirectURL": "", 23 | "headersSize" : -1, 24 | "bodySize" : -1, 25 | } 26 | if encoding is not None: 27 | har_response['content']['encoding'] = encoding 28 | return har_response 29 | 30 | 31 | @pytest.mark.parametrize(["text", "encoding", "result"], [ 32 | ["hello", None, b'hello'], 33 | [ 34 | base64.b64encode(u"привет".encode('cp1251')).decode('ascii'), 35 | 'base64', 36 | u"привет".encode('cp1251') 37 | ], 38 | ["", None, b""], 39 | ["", 'base64', b""], 40 | [u"привет", None, u"привет".encode('utf8')], 41 | [u"привет", 'binary', u"привет".encode('utf8')], 42 | [u"привет".encode('utf8'), 'binary', u"привет".encode('utf8')], 43 | ]) 44 | def test_get_body_bytes(text, encoding, result): 45 | har_response = get_har_response(text, encoding) 46 | assert get_response_body_bytes(har_response) == result 47 | 48 | 49 | def test_body_bytes_bad_encoding(): 50 | har_response = get_har_response("hello", "i-am-unknown") 51 | with pytest.raises(ValueError): 52 | get_response_body_bytes(har_response) 53 | -------------------------------------------------------------------------------- /splash/tests/test_jupyter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pytest 3 | jupyter_kernel_test = pytest.importorskip("jupyter_kernel_test") 4 | lupa = pytest.importorskip("lupa") 5 | 6 | 7 | class SplashKernelTest(jupyter_kernel_test.KernelTests): 8 | 9 | @classmethod 10 | def setUpClass(cls): 11 | # XXX: this installs kernel spec to global user environment, 12 | # not to a virtualenv. 13 | from splash.kernel import kernel 14 | kernel.install() 15 | 16 | super(SplashKernelTest, cls).setUpClass() 17 | 18 | # The name identifying an installed kernel to run the tests against 19 | kernel_name = "splash" 20 | 21 | # language_info.name in a kernel_info_reply should match this 22 | language_name = "Splash" 23 | 24 | # Optional -------------------------------------- 25 | # Code in the kernel's language to write "hello, world" to stdout 26 | # code_hello_world = "print 'hello, world'" 27 | 28 | # Tab completions: in each dictionary, text is the input, which it will 29 | # try to complete from the end of. matches is the collection of results 30 | # it should expect. 31 | completion_samples = [ 32 | { 33 | 'text': 'spl', 34 | 'matches': {'splash'}, 35 | }, 36 | { 37 | 'text': 'splash:eva', 38 | 'matches': {'evaljs'}, 39 | }, 40 | { 41 | 'text': 'splash.ar', 42 | 'matches': {'args'}, 43 | }, 44 | ] 45 | 46 | # Code completeness: samples grouped by expected result 47 | # complete_code_samples = ['x=2'] 48 | # incomplete_code_samples = ['function foo(', '"""in a string'] 49 | # invalid_code_samples = ['x=2a'] 50 | 51 | # # Pager: code that should display something (anything) in the pager 52 | # code_page_something = "help('foldl')" 53 | -------------------------------------------------------------------------------- /splash/tests/test_qtutils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from PyQt5.QtCore import QSize, QSizeF 3 | 4 | from splash.qtutils import qsize_to_tuple 5 | 6 | 7 | def test_qsize_to_tuple(): 8 | assert qsize_to_tuple(QSize(2, 3)) == (2, 3) 9 | assert qsize_to_tuple(QSizeF(2.0, 3.0)) == (2.0, 3.0) 10 | -------------------------------------------------------------------------------- /splash/tests/test_request_body_lua.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from splash.tests.test_execute import BaseLuaRenderTest 4 | 5 | 6 | class RequestBodyLuaTest(BaseLuaRenderTest): 7 | def test_request_body_enabled(self): 8 | url = self.mockurl('jspost') 9 | resp = self.request_lua(""" 10 | treat = require('treat') 11 | function main(splash) 12 | splash.request_body_enabled = true 13 | assert(splash:go(splash.args.url)) 14 | splash:wait(0.1) 15 | local har1 = splash:har{reset=true} 16 | local enabled1 = splash.request_body_enabled 17 | splash.request_body_enabled = false 18 | assert(splash:go(splash.args.url)) 19 | splash:wait(0.1) 20 | local har2 = splash:har() 21 | local enabled2 = splash.request_body_enabled 22 | return { 23 | har = treat.as_array({har1, har2}), 24 | enabled1 = enabled1, 25 | enabled2 = enabled2 26 | } 27 | end 28 | """, {'url': url}) 29 | self.assertStatusCode(resp, 200) 30 | data = resp.json() 31 | 32 | assert data['enabled1'] 33 | assert not data['enabled2'] 34 | 35 | har1 = data['har'][0]['log']['entries'] 36 | assert 'postData' in har1[1]['request'] 37 | 38 | har2 = data['har'][1]['log']['entries'] 39 | assert 'postData' not in har2[1]['request'] 40 | 41 | def test_request_info_on_request_postdata(self): 42 | url = self.mockurl('jspost') 43 | resp = self.request_lua(""" 44 | function main(splash) 45 | splash.request_body_enabled = true 46 | 47 | local request_info = nil 48 | 49 | splash:on_request(function(request) 50 | if request.method == "POST" then 51 | request_info = request.info 52 | end 53 | end) 54 | 55 | assert(splash:go(splash.args.url)) 56 | splash:wait(0.1) 57 | 58 | local post_data = request_info["postData"] 59 | return { 60 | text = post_data["text"], 61 | mime_type = post_data["mimeType"] 62 | } 63 | end 64 | """, {'url': url}) 65 | self.assertStatusCode(resp, 200) 66 | data = resp.json() 67 | 68 | assert data['text'] == "hidden-field=i-am-hidden&a-field=field+value" 69 | assert data['mime_type'] == "application/x-www-form-urlencoded" 70 | 71 | def test_request_info_on_response_postdata(self): 72 | url = self.mockurl('jspost') 73 | resp = self.request_lua(""" 74 | function main(splash) 75 | splash.request_body_enabled = true 76 | 77 | local request_info = nil 78 | 79 | splash:on_response(function(response) 80 | if response.request.method == "POST" then 81 | request_info = response.request.info 82 | end 83 | end) 84 | 85 | assert(splash:go(splash.args.url)) 86 | splash:wait(0.1) 87 | 88 | local post_data = request_info["postData"] 89 | return { 90 | text = post_data["text"], 91 | mime_type = post_data["mimeType"] 92 | } 93 | end 94 | """, {'url': url}) 95 | self.assertStatusCode(resp, 200) 96 | data = resp.json() 97 | 98 | assert data['text'] == "hidden-field=i-am-hidden&a-field=field+value" 99 | assert data['mime_type'] == "application/x-www-form-urlencoded" 100 | -------------------------------------------------------------------------------- /splash/tests/test_response_middleware.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import pytest 4 | 5 | from .test_render import BaseRenderTest 6 | 7 | 8 | class ContentTypeTest(BaseRenderTest): 9 | """Tests the content type middleware """ 10 | endpoint = 'render.json' 11 | 12 | def _request(self, allowed_ctypes=None, forbidden_ctypes=None): 13 | js_source = """ 14 | JSON.stringify({ 15 | imageLoaded: window.imageLoaded, 16 | styleLoaded: getComputedStyle(document.body).backgroundColor == 'rgb(255, 0, 0)' 17 | }); 18 | """ 19 | query = { 20 | 'url': self.mockurl("subresources/"), 21 | 'script': 1, 22 | 'console': 1, 23 | 'js_source': js_source, 24 | } 25 | if allowed_ctypes is not None: 26 | query['allowed_content_types'] = allowed_ctypes 27 | 28 | if forbidden_ctypes is not None: 29 | query['forbidden_content_types'] = forbidden_ctypes 30 | 31 | req_headers = {'content-type': 'application/json'} 32 | response = self.post(query, 33 | endpoint=self.endpoint, 34 | payload=json.dumps(query), 35 | headers=req_headers 36 | ).json()['script'] 37 | return json.loads(response) 38 | 39 | def test_disable(self): 40 | self.assertEqual(self._request(), { 41 | u'styleLoaded': True, 42 | u'imageLoaded': True 43 | }) 44 | 45 | def test_block_css(self): 46 | self.assertEqual(self._request(forbidden_ctypes='text/css'), { 47 | u'styleLoaded': False, 48 | u'imageLoaded': True 49 | }) 50 | 51 | def test_block_images(self): 52 | self.assertEqual(self._request(forbidden_ctypes='image/*'), { 53 | u'styleLoaded': True, 54 | u'imageLoaded': False 55 | }) 56 | 57 | def test_block_both(self): 58 | self.assertEqual(self._request(forbidden_ctypes='image/*,text/css'), { 59 | u'styleLoaded': False, 60 | u'imageLoaded': False 61 | }) 62 | 63 | def test_allow_images(self): 64 | self.assertEqual(self._request(allowed_ctypes='image/*,text/html'), { 65 | u'styleLoaded': False, 66 | u'imageLoaded': True 67 | }) 68 | 69 | @pytest.mark.xfail( 70 | run=False, 71 | reason="https://github.com/scrapinghub/splash/issues/247") 72 | def test_dont_block_invalid(self): 73 | resp = self.request({"url": self.mockurl("bad-content-type")}) 74 | self.assertEqual(resp.text, "ok") 75 | -------------------------------------------------------------------------------- /splash/tests/test_response_tracking.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import base64 3 | 4 | import requests 5 | 6 | from splash.har.utils import get_response_body_bytes 7 | from splash.tests.test_execute import BaseLuaRenderTest 8 | 9 | 10 | class ResponseTrackingTest(BaseLuaRenderTest): 11 | def assertHarEntriesLength(self, har, length): 12 | entries = har['log']['entries'] 13 | assert len(entries) == length 14 | return entries 15 | 16 | def assertNoContent(self, entry): 17 | assert 'text' not in entry['response']['content'] 18 | 19 | def assertBase64Content(self, entry, body): 20 | assert entry['response']['content']['encoding'] == 'base64' 21 | assert get_response_body_bytes(entry['response']) == body 22 | 23 | def test_enable_response_body(self): 24 | url = self.mockurl('show-image') 25 | resp = self.request_lua(""" 26 | function main(splash) 27 | splash:on_request(function(req) 28 | if req.url:find(".gif") ~= nil then 29 | req:enable_response_body() 30 | end 31 | end) 32 | 33 | local bodies = {} 34 | splash:on_response(function(resp, req) 35 | bodies[resp.url] = resp.body 36 | end) 37 | 38 | assert(splash:go(splash.args.url)) 39 | return {har=splash:har(), bodies=bodies} 40 | end 41 | """, {'url': url}) 42 | self.assertStatusCode(resp, 200) 43 | data = resp.json() 44 | 45 | bodies = data['bodies'] 46 | assert len(bodies) == 1 47 | url = list(bodies.keys())[0] 48 | assert "slow.gif" in url 49 | img_gif = requests.get(self.mockurl("slow.gif?n=0")).content 50 | body = base64.b64decode(bodies[url]) 51 | assert body == img_gif 52 | 53 | entries = self.assertHarEntriesLength(data['har'], 2) 54 | self.assertNoContent(entries[0]) 55 | self.assertBase64Content(entries[1], img_gif) 56 | 57 | def test_response_body_enabled(self): 58 | url = self.mockurl('show-image') 59 | resp = self.request_lua(""" 60 | treat = require('treat') 61 | function main(splash) 62 | splash.response_body_enabled = true 63 | assert(splash:go(splash.args.url)) 64 | local har1 = splash:har{reset=true} 65 | splash.response_body_enabled = false 66 | assert(splash:go(splash.args.url)) 67 | local har2 = splash:har() 68 | return { 69 | har = treat.as_array({har1, har2}), 70 | enabled2 = splash.response_body_enabled, 71 | } 72 | end 73 | """, {'url': url}) 74 | self.assertStatusCode(resp, 200) 75 | data = resp.json() 76 | assert data['enabled2'] is False 77 | 78 | img_gif = requests.get(self.mockurl("slow.gif?n=0")).content 79 | resp = requests.get(self.mockurl('show-image')).content 80 | 81 | entries = self.assertHarEntriesLength(data['har'][0], 2) 82 | body = get_response_body_bytes(entries[0]['response']) 83 | assert body[:50] == resp[:50] # there is some randomness in the end 84 | self.assertBase64Content(entries[1], img_gif) 85 | 86 | entries = self.assertHarEntriesLength(data['har'][1], 2) 87 | self.assertNoContent(entries[0]) 88 | self.assertNoContent(entries[1]) 89 | 90 | -------------------------------------------------------------------------------- /splash/tests/test_run.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from .test_execute import BaseLuaRenderTest 3 | 4 | class BaseRunTest(BaseLuaRenderTest): 5 | endpoint = 'run' 6 | 7 | 8 | class RunTest(BaseRunTest): 9 | def test_render(self): 10 | resp = self.request_lua("splash:go(args.url); return splash:html()", 11 | {'url': self.mockurl('jsrender')}) 12 | self.assertStatusCode(resp, 200) 13 | self.assertNotIn("Before", resp.text) 14 | self.assertIn("After", resp.text) 15 | -------------------------------------------------------------------------------- /splash/tests/test_runjs.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import requests 3 | from .test_render import BaseRenderTest 4 | from .utils import SplashServer 5 | 6 | 7 | CROSS_DOMAIN_JS = """ 8 | function getContents(){ 9 | var iframe = document.getElementById('external'); 10 | return iframe.contentDocument.getElementsByTagName('body')[0].innerHTML; 11 | }; 12 | getContents();""" 13 | 14 | 15 | class RunJsTest(BaseRenderTest): 16 | endpoint = 'render.json' 17 | 18 | def test_simple_js(self): 19 | js_source = "function test(x){ return x; } test('abc');" 20 | r = self._runjs_request(js_source).json() 21 | self.assertEqual(r['script'], "abc") 22 | 23 | def test_js_and_console(self): 24 | js_source = """function test(x){ return x; } 25 | console.log('some log'); 26 | console.log('another log'); 27 | test('abc');""" 28 | params = {'console': '1'} 29 | r = self._runjs_request(js_source, params=params).json() 30 | self.assertEqual(r['script'], "abc") 31 | self.assertEqual(r['console'], ["some log", "another log"]) 32 | 33 | def test_js_modify_html(self): 34 | js_source = """function test(x){ document.getElementById("p1").innerHTML=x; } 35 | test('Changed');""" 36 | params = {'url': self.mockurl("jsrender")} 37 | r = self._runjs_request(js_source, endpoint='render.html', params=params) 38 | self.assertTrue("Before" not in r.text) 39 | self.assertTrue("Changed" in r.text) 40 | 41 | def test_js_profile(self): 42 | js_source = """test('abc');""" 43 | params = {'url': self.mockurl("jsrender"), 'js': 'test'} 44 | r = self._runjs_request(js_source, params=params).json() 45 | self.assertEqual(r['script'], "abc") 46 | 47 | def test_js_profile_another_lib(self): 48 | js_source = """test2('abc');""" 49 | params = {'url': self.mockurl("jsrender"), 'js': 'test'} 50 | r = self._runjs_request(js_source, params=params).json() 51 | self.assertEqual(r['script'], "abcabc") 52 | 53 | def test_js_utf8_lib(self): 54 | js_source = """console.log(test_utf8('abc')); test_utf8('abc');""" 55 | params = {'url': self.mockurl("jsrender"), 'js': 'test', 'console': '1'} 56 | r = self._runjs_request(js_source, params=params).json() 57 | self.assertEqual(r['script'], u'abc\xae') 58 | self.assertEqual(r['console'], [u'abc\xae']) 59 | 60 | def test_js_nonexisting(self): 61 | resp = self._runjs_request("console.log('hello');", params={ 62 | 'url': self.mockurl('jsrender'), 63 | 'js': '../../filters' 64 | }) 65 | data = self.assertJsonError(resp, 400, "BadOption") 66 | self.assertEqual(data['info']['argument'], 'js') 67 | self.assertIn("does not exist", data['info']['description']) 68 | 69 | def test_js_external_iframe(self): 70 | # by default, cross-domain access is disabled, so this does nothing 71 | params = {'url': self.mockurl("externaliframe")} 72 | r = self._runjs_request(CROSS_DOMAIN_JS, params=params).json() 73 | self.assertNotIn('script', r) 74 | 75 | def test_js_incorrect_content_type(self): 76 | js_source = "function test(x){ return x; } test('abc');" 77 | headers = {'content-type': 'text/plain'} 78 | r = self._runjs_request(js_source, headers=headers) 79 | self.assertStatusCode(r, 415) 80 | 81 | def test_proper_viewport(self): 82 | js_source = """ 83 | function size() { 84 | return [window.innerWidth, window.innerHeight].toString(); 85 | } 86 | size(); 87 | """ 88 | params = {'viewport': '123x234'} 89 | r = self._runjs_request(js_source, params=params).json() 90 | self.assertEqual(r['script'], '123,234') 91 | 92 | def test_js_invalid_profile(self): 93 | js_source = """test('abc');""" 94 | params = {'url': self.mockurl("jsrender"), 'js': 'not_a_profile'} 95 | r = self._runjs_request(js_source, params=params) 96 | self.assertStatusCode(r, 400) 97 | 98 | 99 | def _runjs_request(self, js_source, endpoint=None, params=None, headers=None): 100 | query = {'url': self.mockurl("jsrender"), 'script': 1} 101 | query.update(params or {}) 102 | req_headers = {'content-type': 'application/javascript'} 103 | req_headers.update(headers or {}) 104 | return self.post(query, endpoint=endpoint, 105 | payload=js_source, headers=req_headers) 106 | 107 | 108 | class RunJsCrossDomainTest(BaseRenderTest): 109 | 110 | def test_js_external_iframe_cross_domain_enabled(self): 111 | # cross-domain access should work if we enable it 112 | with SplashServer(extra_args=['--js-cross-domain-access']) as splash: 113 | query = {'url': self.mockurl("externaliframe"), 'script': 1} 114 | headers = {'content-type': 'application/javascript'} 115 | response = requests.post( 116 | splash.url("render.json"), 117 | params=query, 118 | headers=headers, 119 | data=CROSS_DOMAIN_JS, 120 | ) 121 | self.assertEqual(response.json()['script'], u'EXTERNAL\n\n') 122 | 123 | -------------------------------------------------------------------------------- /splash/tests/test_ui.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from splash import lua 3 | from .test_render import BaseRenderTest 4 | 5 | 6 | class UITest(BaseRenderTest): 7 | 8 | def test_render_ui_available(self): 9 | ui_main = self.request({}, endpoint="") 10 | self.assertStatusCode(ui_main, 200) 11 | self.assertIn("Splash", ui_main.text) 12 | 13 | if lua.is_supported(): 14 | self.assertIn('"lua_enabled": true', ui_main.text) 15 | else: 16 | self.assertIn('"lua_enabled": false', ui_main.text) 17 | -------------------------------------------------------------------------------- /splash/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import unittest 3 | 4 | import pytest 5 | 6 | from splash.utils import to_bytes, to_unicode 7 | from splash.utils import swap_byte_order_i32 8 | 9 | 10 | class ToUnicodeTest(unittest.TestCase): 11 | def test_converting_an_utf8_encoded_string_to_unicode(self): 12 | self.assertEqual(to_unicode(b'lel\xc3\xb1e'), u'lel\xf1e') 13 | 14 | def test_converting_a_latin_1_encoded_string_to_unicode(self): 15 | self.assertEqual(to_unicode(b'lel\xf1e', 'latin-1'), u'lel\xf1e') 16 | 17 | def test_converting_a_unicode_to_unicode_should_return_the_same_object(self): 18 | self.assertEqual(to_unicode(u'\xf1e\xf1e\xf1e'), u'\xf1e\xf1e\xf1e') 19 | 20 | def test_converting_a_strange_object_should_raise_TypeError(self): 21 | self.assertRaises(TypeError, to_unicode, 423) 22 | 23 | def test_errors_argument(self): 24 | self.assertEqual( 25 | to_unicode(b'a\xedb', 'utf-8', errors='replace'), 26 | u'a\ufffdb' 27 | ) 28 | 29 | 30 | class ToBytesTest(unittest.TestCase): 31 | def test_converting_a_unicode_object_to_an_utf_8_encoded_string(self): 32 | self.assertEqual(to_bytes(u'\xa3 49'), b'\xc2\xa3 49') 33 | 34 | def test_converting_a_unicode_object_to_a_latin_1_encoded_string(self): 35 | self.assertEqual(to_bytes(u'\xa3 49', 'latin-1'), b'\xa3 49') 36 | 37 | def test_converting_a_regular_bytes_to_bytes_should_return_the_same_object(self): 38 | self.assertEqual(to_bytes(b'lel\xf1e'), b'lel\xf1e') 39 | 40 | def test_converting_a_strange_object_should_raise_TypeError(self): 41 | self.assertRaises(TypeError, to_bytes, unittest) 42 | 43 | def test_errors_argument(self): 44 | self.assertEqual( 45 | to_bytes(u'a\ufffdb', 'latin-1', errors='replace'), 46 | b'a?b' 47 | ) 48 | 49 | 50 | def test_swap_byte_order_i32(): 51 | assert swap_byte_order_i32(b"") == b"" 52 | assert swap_byte_order_i32(b"abcd") == b"dcba" 53 | assert swap_byte_order_i32(b"abcdefgh") == b"dcbahgfe" 54 | 55 | with pytest.raises(ValueError): 56 | swap_byte_order_i32(b"abcdef") 57 | with pytest.raises(ValueError): 58 | swap_byte_order_i32(b"abc") 59 | -------------------------------------------------------------------------------- /splash/tests/this-is-not-a-proxy-profile.ini: -------------------------------------------------------------------------------- 1 | [section] 2 | key=value 3 | -------------------------------------------------------------------------------- /splash/ui/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/splash/ab28b0233c245461189881f1f5656b96371a4b40/splash/ui/favicon.ico -------------------------------------------------------------------------------- /splash/ui/style.css: -------------------------------------------------------------------------------- 1 | /* fix bootstrap + harviewer compatibility issues */ 2 | .label { 3 | color: #000; 4 | font-weight: normal; 5 | font-size: 100%; 6 | } 7 | 8 | table { 9 | border-collapse: inherit; 10 | } 11 | 12 | #content pre { 13 | border: 0; 14 | padding: 1px; 15 | font-family: Menlo,Monaco,Consolas,"Courier New",monospace; 16 | font-size: 13px; 17 | } 18 | .netInfoParamName { 19 | font-size: 11px; 20 | } 21 | #content * { 22 | box-sizing: content-box; 23 | } 24 | .netInfoHeadersText { 25 | font-size: 13px; 26 | } 27 | .tab { /* nicer Headers tabs */ 28 | font-weight: inherit; 29 | } 30 | 31 | .netInfoHeadersGroup, 32 | .netInfoCookiesGroup { 33 | font-weight: normal; 34 | } 35 | 36 | .harBody { 37 | margin-bottom: 2em; 38 | } 39 | 40 | .tabBodies { /* fix an issue with extra horizontal scrollbar */ 41 | overflow: hidden; 42 | } 43 | 44 | .netCol.netOptionsCol { /* remove unsupported buttons */ 45 | display: none; 46 | } 47 | 48 | body.no-lua .if-lua { /* Hide stuff that requires lua support if there is no lua support */ 49 | display: none; 50 | } 51 | 52 | /* styles for custom events */ 53 | .netPageTimingBar {opacity: 0.3; width: 2px; } 54 | .timeInfoTip { width: 250px !important; } 55 | .customEventBar { background-color: gray; } 56 | ._onStarted { background-color: marine; } 57 | ._onPrepareStart { background-color: green; } 58 | ._onCustomJsExecuted { background-color: green; } 59 | ._onScreenshotPrepared { background-color: magenta; } 60 | ._onPngRendered { background-color: magenta; } 61 | ._onIframesRendered { background-color: black; } 62 | 63 | /* editor styling */ 64 | #lua-code-editor-panel { 65 | padding: 0; 66 | } 67 | 68 | .CodeMirror-hints { 69 | z-index: 1001; 70 | } 71 | 72 | .splash-tooltip { 73 | max-width: 25em; 74 | border: 1px solid #C0C0C0; 75 | border-radius: 3px; 76 | color: #444; 77 | padding: 2px 5px; 78 | font-size: 90%; 79 | font-family: monospace; 80 | background-color: #FFF; 81 | white-space: pre-wrap; 82 | position: absolute; 83 | z-index: 1002; 84 | box-shadow: 2px 3px 5px rgba(0, 0, 0, 0.2); 85 | } 86 | 87 | #result { 88 | font-family: monospace; 89 | } 90 | #result .indent { 91 | margin-left: 30px; 92 | } 93 | #result img.small { 94 | max-width: 400px; 95 | max-height: 300px; 96 | border: 1px solid black; 97 | } 98 | #result textarea { 99 | width: 100%; 100 | } 101 | 102 | #result .string { 103 | color: #008000; 104 | font-style: italic; 105 | } 106 | #result .number, #result .boolean { 107 | color: red; 108 | } 109 | #result .type { 110 | color: blue; 111 | } 112 | 113 | #result .key { 114 | color: #00008B; 115 | } 116 | 117 | #result .colon, #result .punct { 118 | color: #666; 119 | } 120 | 121 | #result .action { 122 | color: #337AB7; 123 | text-decoration: underline; 124 | margin-left: 10px; 125 | cursor: pointer; 126 | } 127 | .demo-link { 128 | font-size: 75%; 129 | vertical-align: 10%; 130 | } 131 | 132 | .demo-link::before { 133 | content: "["; 134 | color: #808080; 135 | } 136 | .demo-link::after{ 137 | content: "]"; 138 | color: #808080; 139 | } 140 | 141 | .examples-dropdown { 142 | display: inline-block; 143 | } 144 | -------------------------------------------------------------------------------- /splash/xvfb.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Module for starting Xvfb automatically if it is available. 4 | Uses xvfbwrapper Python package. 5 | """ 6 | import sys 7 | from contextlib import contextmanager 8 | from splash import defaults 9 | from twisted.python import log 10 | 11 | 12 | def autostart(disable=False, screen_size=None): 13 | if disable: 14 | return _dummy() 15 | return _get_xvfb(screen_size=screen_size) or _dummy() 16 | 17 | 18 | def log_options(xvfb): 19 | if not hasattr(xvfb, 'xvfb_cmd'): # dummy 20 | log.msg("Xvfb is not started automatically") 21 | else: 22 | log.msg("Xvfb is started: %s" % xvfb.xvfb_cmd) 23 | 24 | 25 | @contextmanager 26 | def _dummy(): 27 | yield 28 | 29 | 30 | def _get_xvfb(screen_size=None): 31 | if not sys.platform.startswith('linux'): 32 | return None 33 | 34 | try: 35 | from xvfbwrapper import Xvfb 36 | screen_size = screen_size or defaults.VIEWPORT_SIZE 37 | width, height = map(int, screen_size.split("x")) 38 | return Xvfb(width, height, nolisten="tcp") 39 | except ImportError: 40 | return None 41 | --------------------------------------------------------------------------------