├── LICENSE ├── NOTICE ├── README.md ├── assets └── screenshot1.png ├── collections └── .gitignore ├── config.yaml ├── docker-compose.yml ├── rbsapp.py └── uwsgi.ini /LICENSE: -------------------------------------------------------------------------------- 1 | Version 2.0, January 2004 2 | http://www.apache.org/licenses/ 3 | 4 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 5 | 6 | 1. Definitions. 7 | 8 | "License" shall mean the terms and conditions for use, reproduction, 9 | and distribution as defined by Sections 1 through 9 of this document. 10 | 11 | "Licensor" shall mean the copyright owner or entity authorized by 12 | the copyright owner that is granting the License. 13 | 14 | "Legal Entity" shall mean the union of the acting entity and all 15 | other entities that control, are controlled by, or are under common 16 | control with that entity. For the purposes of this definition, 17 | "control" means (i) the power, direct or indirect, to cause the 18 | direction or management of such entity, whether by contract or 19 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 20 | outstanding shares, or (iii) beneficial ownership of such entity. 21 | 22 | "You" (or "Your") shall mean an individual or Legal Entity 23 | exercising permissions granted by this License. 24 | 25 | "Source" form shall mean the preferred form for making modifications, 26 | including but not limited to software source code, documentation 27 | source, and configuration files. 28 | 29 | "Object" form shall mean any form resulting from mechanical 30 | transformation or translation of a Source form, including but 31 | not limited to compiled object code, generated documentation, 32 | and conversions to other media types. 33 | 34 | "Work" shall mean the work of authorship, whether in Source or 35 | Object form, made available under the License, as indicated by a 36 | copyright notice that is included in or attached to the work 37 | (an example is provided in the Appendix below). 38 | 39 | "Derivative Works" shall mean any work, whether in Source or Object 40 | form, that is based on (or derived from) the Work and for which the 41 | editorial revisions, annotations, elaborations, or other modifications 42 | represent, as a whole, an original work of authorship. For the purposes 43 | of this License, Derivative Works shall not include works that remain 44 | separable from, or merely link (or bind by name) to the interfaces of, 45 | the Work and Derivative Works thereof. 46 | 47 | "Contribution" shall mean any work of authorship, including 48 | the original version of the Work and any modifications or additions 49 | to that Work or Derivative Works thereof, that is intentionally 50 | submitted to Licensor for inclusion in the Work by the copyright owner 51 | or by an individual or Legal Entity authorized to submit on behalf of 52 | the copyright owner. For the purposes of this definition, "submitted" 53 | means any form of electronic, verbal, or written communication sent 54 | to the Licensor or its representatives, including but not limited to 55 | communication on electronic mailing lists, source code control systems, 56 | and issue tracking systems that are managed by, or on behalf of, the 57 | Licensor for the purpose of discussing and improving the Work, but 58 | excluding communication that is conspicuously marked or otherwise 59 | designated in writing by the copyright owner as "Not a Contribution." 60 | 61 | "Contributor" shall mean Licensor and any individual or Legal Entity 62 | on behalf of whom a Contribution has been received by Licensor and 63 | subsequently incorporated within the Work. 64 | 65 | 2. Grant of Copyright License. Subject to the terms and conditions of 66 | this License, each Contributor hereby grants to You a perpetual, 67 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 68 | copyright license to reproduce, prepare Derivative Works of, 69 | publicly display, publicly perform, sublicense, and distribute the 70 | Work and such Derivative Works in Source or Object form. 71 | 72 | 3. Grant of Patent License. Subject to the terms and conditions of 73 | this License, each Contributor hereby grants to You a perpetual, 74 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 75 | (except as stated in this section) patent license to make, have made, 76 | use, offer to sell, sell, import, and otherwise transfer the Work, 77 | where such license applies only to those patent claims licensable 78 | by such Contributor that are necessarily infringed by their 79 | Contribution(s) alone or by combination of their Contribution(s) 80 | with the Work to which such Contribution(s) was submitted. If You 81 | institute patent litigation against any entity (including a 82 | cross-claim or counterclaim in a lawsuit) alleging that the Work 83 | or a Contribution incorporated within the Work constitutes direct 84 | or contributory patent infringement, then any patent licenses 85 | granted to You under this License for that Work shall terminate 86 | as of the date such litigation is filed. 87 | 88 | 4. Redistribution. You may reproduce and distribute copies of the 89 | Work or Derivative Works thereof in any medium, with or without 90 | modifications, and in Source or Object form, provided that You 91 | meet the following conditions: 92 | 93 | (a) You must give any other recipients of the Work or 94 | Derivative Works a copy of this License; and 95 | 96 | (b) You must cause any modified files to carry prominent notices 97 | stating that You changed the files; and 98 | 99 | (c) You must retain, in the Source form of any Derivative Works 100 | that You distribute, all copyright, patent, trademark, and 101 | attribution notices from the Source form of the Work, 102 | excluding those notices that do not pertain to any part of 103 | the Derivative Works; and 104 | 105 | (d) If the Work includes a "NOTICE" text file as part of its 106 | distribution, then any Derivative Works that You distribute must 107 | include a readable copy of the attribution notices contained 108 | within such NOTICE file, excluding those notices that do not 109 | pertain to any part of the Derivative Works, in at least one 110 | of the following places: within a NOTICE text file distributed 111 | as part of the Derivative Works; within the Source form or 112 | documentation, if provided along with the Derivative Works; or, 113 | within a display generated by the Derivative Works, if and 114 | wherever such third-party notices normally appear. The contents 115 | of the NOTICE file are for informational purposes only and 116 | do not modify the License. You may add Your own attribution 117 | notices within Derivative Works that You distribute, alongside 118 | or as an addendum to the NOTICE text from the Work, provided 119 | that such additional attribution notices cannot be construed 120 | as modifying the License. 121 | 122 | You may add Your own copyright statement to Your modifications and 123 | may provide additional or different license terms and conditions 124 | for use, reproduction, or distribution of Your modifications, or 125 | for any such Derivative Works as a whole, provided Your use, 126 | reproduction, and distribution of the Work otherwise complies with 127 | the conditions stated in this License. 128 | 129 | 5. Submission of Contributions. Unless You explicitly state otherwise, 130 | any Contribution intentionally submitted for inclusion in the Work 131 | by You to the Licensor shall be under the terms and conditions of 132 | this License, without any additional terms or conditions. 133 | Notwithstanding the above, nothing herein shall supersede or modify 134 | the terms of any separate license agreement you may have executed 135 | with Licensor regarding such Contributions. 136 | 137 | 6. Trademarks. This License does not grant permission to use the trade 138 | names, trademarks, service marks, or product names of the Licensor, 139 | except as required for reasonable and customary use in describing the 140 | origin of the Work and reproducing the content of the NOTICE file. 141 | 142 | 7. Disclaimer of Warranty. Unless required by applicable law or 143 | agreed to in writing, Licensor provides the Work (and each 144 | Contributor provides its Contributions) on an "AS IS" BASIS, 145 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 146 | implied, including, without limitation, any warranties or conditions 147 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 148 | PARTICULAR PURPOSE. You are solely responsible for determining the 149 | appropriateness of using or redistributing the Work and assume any 150 | risks associated with Your exercise of permissions under this License. 151 | 152 | 8. Limitation of Liability. In no event and under no legal theory, 153 | whether in tort (including negligence), contract, or otherwise, 154 | unless required by applicable law (such as deliberate and grossly 155 | negligent acts) or agreed to in writing, shall any Contributor be 156 | liable to You for damages, including any direct, indirect, special, 157 | incidental, or consequential damages of any character arising as a 158 | result of this License or out of the use or inability to use the 159 | Work (including but not limited to damages for loss of goodwill, 160 | work stoppage, computer failure or malfunction, or any and all 161 | other commercial damages or losses), even if such Contributor 162 | has been advised of the possibility of such damages. 163 | 164 | 9. Accepting Warranty or Additional Liability. While redistributing 165 | the Work or Derivative Works thereof, You may choose to offer, 166 | and charge a fee for, acceptance of support, warranty, indemnity, 167 | or other liability obligations and/or rights consistent with this 168 | License. However, in accepting such obligations, You may act only 169 | on Your own behalf and on Your sole responsibility, not on behalf 170 | of any other Contributor, and only if You agree to indemnify, 171 | defend, and hold each Contributor harmless for any liability 172 | incurred by, or claims asserted against, such Contributor by reason 173 | of your accepting any such warranty or additional liability. 174 | 175 | END OF TERMS AND CONDITIONS 176 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | pywb-remote-browsers 2 | Copyright 2021 Webrecorder Software and Contributors. 3 | 4 | Distributed under the Apache License 2.0. 5 | See LICENSE for details. 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pywb Remote Browsers 2 | 3 | This repository provides a simple configuration for deploying any [pywb](https://github.com/webrecorder/pywb) 4 | with remote browsers provided by [OWT/Shepherd Remote Browser System](https://github.com/oldweb-today/shepherd) 5 | 6 | To use: 7 | 8 | 1) Clone this repo 9 | 10 | 2) Add new or existing collections to the `collections` directory, eg. by running [wb-manager](https://pywb.readthedocs.io/en/latest/manual/apps.html#wb-manager) 11 | or copy an [existing pywb collections directory](https://pywb.readthedocs.io/en/latest/manual/configuring.html#directory-structure) into the local collections directory. 12 | 13 | 3) Pull one or more Browser Docker Images from: https://hub.docker.com/r/oldwebtoday/chrome/tags or https://hub.docker.com/r/oldwebtoday/firefox/tags 14 | 15 | **To use a browser with Flash Support, `chrome:84` is recommended., eg. `docker pull oldwebtoday/chrome:84`** 16 | (See below for more info). 17 | 18 | 4) Run `docker-compose up -d` to start. 19 | 20 | 4) Go to `http://localhost:9020/` 21 | 22 | ### Browsing UI 23 | 24 | The default UI provides a set of controls for entering URL, timestamp, choosing a browser, and choosing a collection. The system will then start the selected remote browser connected to that pywb collection in an iframe. 25 | 26 | 27 | 28 | 29 | ### Viewer UI 30 | 31 | To load the remote browser directly without the iframe or sidebar controls, you can also visit `http://localhost:9020////`. 32 | 33 | 34 | ## Getting Remote Browser Images 35 | 36 | The Browser Images are downloaded manually, to allow users to choose which browsers they want to run. 37 | 38 | Browsers can be obtained from the oldwebtoday browser repos including: https://hub.docker.com/r/oldwebtoday/chrome/tags and https://hub.docker.com/r/oldwebtoday/firefox/tags. 39 | 40 | At least one browser must be downloaded for the system to work, eg. `docker pull oldwebtoday/chrome:84`. 41 | 42 | ## Flash and Java support 43 | 44 | A key benefit of the remote browser system is to run older browsers, such as those supporting Flash and Java. 45 | 46 | Here are the latest version of Chrome and Firefox that we have available that support Flash and Java: 47 | 48 | - Flash: Chrome 84 (`chrome:84`) or Firefox 68 (`firefox:68`) 49 | - Java: Firefox 49 (`firefox:49`) 50 | 51 | Additional browsers will (hopefully) be added in the future! 52 | 53 | ## Adding pywb collections 54 | 55 | pywb collections can be added while the system is running, and will be recognized on refresh, for example: 56 | 57 | ``` 58 | wb-manager init new-coll 59 | wb-manager add new-coll some-warc-files.warc 60 | ``` 61 | 62 | The `new-coll` should now appear in the collection dropdown. 63 | 64 | 65 | ## Audio 66 | 67 | The system is configured with audio support via WebRTC, using a turn server operating on port 33478. 68 | This port must be accessible to if connecting remotely. 69 | 70 | TODO Add docs on configuring over SSL via nginx proxying. 71 | 72 | ## Recording Mode 73 | 74 | The system also supports recording or capturing into new collections, using the [pywb Recording mode](https://pywb.readthedocs.io/en/latest/manual/configuring.html#recording-mode) 75 | 76 | To enable recording mode, set `ALLOW_RECORD=1` in the docker-compose.yaml 77 | 78 | When enabled, the UI will show an recording checkbox. If checked, a new browser will be started in recording mode. 79 | 80 | This can be used to record existing content, such as Flash or even Java content, using an older browser. 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /assets/screenshot1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/pywb-remote-browsers/4655806d75d7385eebb27965ff301c3dcb2b9fff/assets/screenshot1.png -------------------------------------------------------------------------------- /collections/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /config.yaml: -------------------------------------------------------------------------------- 1 | debug: true 2 | 3 | proxy: 4 | coll: all 5 | enable_content_rewrite: false 6 | enable_banner: false 7 | enable_wombat: true 8 | 9 | ca_name: pywb OWT CA 10 | 11 | # enable for recording 12 | # below can be commented out if not recording 13 | recorder: live 14 | 15 | collections: 16 | live: $live 17 | 18 | autoindex: 30 19 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.3' 2 | 3 | services: 4 | shepherd: 5 | image: oldwebtoday/shepherd:1.2.5-beta.1 6 | environment: 7 | - BROWSER_NET=pywb-remote-browsers_browsers 8 | - PROXY_HOST=pywb 9 | - PROXY_PORT=8080 10 | # set to 1 to allow recording 11 | - ALLOW_RECORD=0 12 | 13 | depends_on: 14 | - redis 15 | 16 | volumes: 17 | - /var/run/docker.sock:/var/run/docker.sock 18 | - ./collections:/app/collections 19 | 20 | ports: 21 | - 9020:9020 22 | 23 | redis: 24 | image: redis:3.2.4 25 | networks: 26 | - browsers 27 | - default 28 | 29 | pywb: 30 | image: webrecorder/pywb:2.6.0b0 31 | networks: 32 | - browsers 33 | 34 | environment: 35 | - REDIS_URL=redis://redis/0 36 | - VOLUME_DIR=/app 37 | 38 | working_dir: /app 39 | 40 | volumes: 41 | - ./uwsgi.ini:/uwsgi/uwsgi.ini 42 | - ./rbsapp.py:/app/rbsapp.py 43 | - ./config.yaml:/app/config.yaml 44 | - ./collections:/app/collections 45 | 46 | coturn: 47 | image: oldwebtoday/coturn:1.0 48 | networks: 49 | - browsers 50 | 51 | ports: 52 | - "33478:33478/tcp" 53 | - "33478:33478/udp" 54 | 55 | networks: 56 | default: 57 | driver: bridge 58 | 59 | browsers: 60 | driver: bridge 61 | -------------------------------------------------------------------------------- /rbsapp.py: -------------------------------------------------------------------------------- 1 | from gevent.monkey import patch_all; patch_all() 2 | 3 | from pywb.apps.frontendapp import FrontEndApp 4 | from warcio.timeutils import http_date_to_datetime, timestamp_now 5 | 6 | from tempfile import SpooledTemporaryFile 7 | 8 | import os 9 | import redis 10 | import logging 11 | import traceback 12 | import re 13 | 14 | # ============================================================================ 15 | class RBSProxyApp(FrontEndApp): 16 | def __init__(self, config_file=None, custom_config=None): 17 | super(RBSProxyApp, self).__init__(config_file='./config.yaml', 18 | custom_config=custom_config) 19 | 20 | self.redis = redis.StrictRedis.from_url(os.environ['REDIS_URL'], decode_responses=True) 21 | 22 | def proxy_route_request(self, url, environ): 23 | try: 24 | key = 'up:' + environ['REMOTE_ADDR'] 25 | 26 | timestamp, coll, mode = self.redis.hmget(key, ['timestamp', 'coll', 'mode']) 27 | print(timestamp, coll, mode) 28 | timestamp = timestamp or timestamp_now() 29 | 30 | environ['pywb_proxy_default_timestamp'] = timestamp 31 | if mode == 'record': 32 | coll += '/record' 33 | 34 | print('/{0}/bn_/'.format(coll) + url) 35 | return '/{0}/bn_/'.format(coll) + url 36 | except Exception as e: 37 | traceback.print_exc() 38 | return self.proxy_prefix + url 39 | 40 | 41 | #============================================================================= 42 | application = RBSProxyApp() 43 | -------------------------------------------------------------------------------- /uwsgi.ini: -------------------------------------------------------------------------------- 1 | 2 | [uwsgi] 3 | if-not-env = PORT 4 | http-socket = :8080 5 | socket = :8081 6 | endif = 7 | 8 | master = true 9 | buffer-size = 65536 10 | die-on-term = true 11 | 12 | if-env = VIRTUAL_ENV 13 | venv = $(VIRTUAL_ENV) 14 | endif = 15 | 16 | gevent = 100 17 | 18 | # specify config file here 19 | #env = PYWB_CONFIG_FILE=/app/config.yaml 20 | wsgi = rbsapp 21 | --------------------------------------------------------------------------------