├── .gitignore ├── CHANGELOG.md ├── README.md ├── example ├── .dockerignore ├── Dockerfile ├── README.md ├── docker-compose.yaml ├── example │ ├── __init__.py │ ├── items.py │ ├── middlewares.py │ ├── pipelines.py │ ├── settings.py │ └── spiders │ │ ├── __init__.py │ │ ├── book.py │ │ ├── movie.py │ │ ├── pretend_test.py │ │ ├── sports.py │ │ └── test_proxy.py ├── requirements.txt ├── run.py └── scrapy.cfg ├── gerapy_pyppeteer ├── __init__.py ├── __version__.py ├── downloadermiddlewares.py ├── pretend.py ├── request.py └── settings.py ├── requirements.txt ├── setup.py └── tests └── __init__.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### JetBrains template 3 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 4 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 5 | 6 | .DS_Store 7 | .idea/ 8 | .vscode/ 9 | 10 | # User-specific stuff 11 | .idea/**/workspace.xml 12 | .idea/**/tasks.xml 13 | .idea/**/usage.statistics.xml 14 | .idea/**/dictionaries 15 | .idea/**/shelf 16 | 17 | # Generated files 18 | .idea/**/contentModel.xml 19 | 20 | # Sensitive or high-churn files 21 | .idea/**/dataSources/ 22 | .idea/**/dataSources.ids 23 | .idea/**/dataSources.local.xml 24 | .idea/**/sqlDataSources.xml 25 | .idea/**/dynamic.xml 26 | .idea/**/uiDesigner.xml 27 | .idea/**/dbnavigator.xml 28 | 29 | # Gradle 30 | .idea/**/gradle.xml 31 | .idea/**/libraries 32 | 33 | # Gradle and Maven with auto-import 34 | # When using Gradle or Maven with auto-import, you should exclude module files, 35 | # since they will be recreated, and may cause churn. Uncomment if using 36 | # auto-import. 37 | # .idea/artifacts 38 | # .idea/compiler.xml 39 | # .idea/jarRepositories.xml 40 | # .idea/modules.xml 41 | # .idea/*.iml 42 | # .idea/modules 43 | # *.iml 44 | # *.ipr 45 | 46 | # CMake 47 | cmake-build-*/ 48 | 49 | # Mongo Explorer plugin 50 | .idea/**/mongoSettings.xml 51 | 52 | # File-based project format 53 | *.iws 54 | 55 | # IntelliJ 56 | out/ 57 | 58 | # mpeltonen/sbt-idea plugin 59 | .idea_modules/ 60 | 61 | # JIRA plugin 62 | atlassian-ide-plugin.xml 63 | 64 | # Cursive Clojure plugin 65 | .idea/replstate.xml 66 | 67 | # Crashlytics plugin (for Android Studio and IntelliJ) 68 | com_crashlytics_export_strings.xml 69 | crashlytics.properties 70 | crashlytics-build.properties 71 | fabric.properties 72 | 73 | # Editor-based Rest Client 74 | .idea/httpRequests 75 | 76 | # Android studio 3.1+ serialized cache file 77 | .idea/caches/build_file_checksums.ser 78 | 79 | ### Python template 80 | # Byte-compiled / optimized / DLL files 81 | __pycache__/ 82 | *.py[cod] 83 | *$py.class 84 | 85 | # C extensions 86 | *.so 87 | 88 | # Distribution / packaging 89 | .Python 90 | build/ 91 | develop-eggs/ 92 | dist/ 93 | downloads/ 94 | eggs/ 95 | .eggs/ 96 | lib/ 97 | lib64/ 98 | parts/ 99 | sdist/ 100 | var/ 101 | wheels/ 102 | pip-wheel-metadata/ 103 | share/python-wheels/ 104 | *.egg-info/ 105 | .installed.cfg 106 | *.egg 107 | MANIFEST 108 | 109 | # PyInstaller 110 | # Usually these files are written by a python script from a template 111 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 112 | *.manifest 113 | *.spec 114 | 115 | # Installer logs 116 | pip-log.txt 117 | pip-delete-this-directory.txt 118 | 119 | # Unit test / coverage reports 120 | htmlcov/ 121 | .tox/ 122 | .nox/ 123 | .coverage 124 | .coverage.* 125 | .cache 126 | nosetests.xml 127 | coverage.xml 128 | *.cover 129 | *.py,cover 130 | .hypothesis/ 131 | .pytest_cache/ 132 | cover/ 133 | 134 | # Translations 135 | *.mo 136 | *.pot 137 | 138 | # Django stuff: 139 | *.log 140 | local_settings.py 141 | db.sqlite3 142 | db.sqlite3-journal 143 | 144 | # Flask stuff: 145 | instance/ 146 | .webassets-cache 147 | 148 | # Scrapy stuff: 149 | .scrapy 150 | 151 | # Sphinx documentation 152 | docs/_build/ 153 | 154 | # PyBuilder 155 | .pybuilder/ 156 | target/ 157 | 158 | # Jupyter Notebook 159 | .ipynb_checkpoints 160 | 161 | # IPython 162 | profile_default/ 163 | ipython_config.py 164 | 165 | # pyenv 166 | # For a library or package, you might want to ignore these files since the code is 167 | # intended to run in multiple environments; otherwise, check them in: 168 | # .python-version 169 | 170 | # pipenv 171 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 172 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 173 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 174 | # install all needed dependencies. 175 | #Pipfile.lock 176 | 177 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 178 | __pypackages__/ 179 | 180 | # Celery stuff 181 | celerybeat-schedule 182 | celerybeat.pid 183 | 184 | # SageMath parsed files 185 | *.sage.py 186 | 187 | # Environments 188 | .env 189 | .venv 190 | env/ 191 | venv/ 192 | ENV/ 193 | env.bak/ 194 | venv.bak/ 195 | 196 | # Spyder project settings 197 | .spyderproject 198 | .spyproject 199 | 200 | # Rope project settings 201 | .ropeproject 202 | 203 | # mkdocs documentation 204 | /site 205 | 206 | # mypy 207 | .mypy_cache/ 208 | .dmypy.json 209 | dmypy.json 210 | 211 | # Pyre type checker 212 | .pyre/ 213 | 214 | # pytype static type analyzer 215 | .pytype/ 216 | 217 | # Cython debug symbols 218 | cython_debug/ 219 | 220 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Gerapy Pyppeteer Changelog 2 | 3 | ## 0.2.4 (2021-12-27) 4 | 5 | - Expose error log for loading process 6 | 7 | ## 0.2.3 (2021-10-16) 8 | 9 | - add support for proxy_credential 10 | - fix bug of network error and add retry 11 | 12 | ## 0.2.2 (2021-09-07) 13 | 14 | ### Features 15 | 16 | - add support for executing Python based functions 17 | - add support for returning script result 18 | 19 | ## 0.1.2 (2021-06-20) 20 | 21 | ### Buf Fixes & Features 22 | 23 | - change GERAPY_ENABLE_REQUEST_INTERCEPTION default to False 24 | 25 | ## 0.1.1 (2021-04-18) 26 | 27 | ### Buf Fixes & Features 28 | 29 | - fix pu_request.headers decode error 30 | - add ua setting 31 | 32 | ## 0.1.0 (2021-04-17) 33 | 34 | ### Buf Fixes & Features 35 | 36 | - fix pyppeteer_meta name error 37 | - add chrome pretend test 38 | - update pretend script & fix local pretend invalid 39 | 40 | ### Bug Fixes 41 | 42 | - Fix bug about executablePath arg name 43 | 44 | ## 0.0.13 (2020-10-30) 45 | 46 | ### Bug Fixes 47 | 48 | - Fix bug about executablePath arg name 49 | 50 | ## 0.0.12 (2020-10-24) 51 | 52 | ### Bug Fixes 53 | 54 | - Fix bug about crawling with Pyppeteer of normal Request 55 | 56 | ## 0.0.11 (2020-08-05) 57 | 58 | ### Bug Fixes 59 | 60 | - Fix bug about `asyncio` in Python 3.8 on Windows [https://github.com/Gerapy/GerapyPyppeteer/issues/5](https://github.com/Gerapy/GerapyPyppeteer/issues/5) 61 | - Fix bug of setting cookies [https://github.com/Gerapy/GerapyPyppeteer/issues/11](https://github.com/Gerapy/GerapyPyppeteer/issues/11) 62 | 63 | ### Features 64 | 65 | - Add settings of `GERAPY_ENABLE_REQUEST_INTERCEPTION` [https://github.com/Gerapy/GerapyPyppeteer/issues/6](https://github.com/Gerapy/GerapyPyppeteer/issues/6) 66 | 67 | ## 0.0.10 (2020-08-01) 68 | 69 | ### Features 70 | 71 | - Add `pretend` attribute for `PyppeteerRequest`, which can override `GERAPY_PYPPETEER_PRETEND` 72 | - Add support for `dict` format of `wait_for` attribute of `PyppeteerRequest` 73 | 74 | ### Bug Fixes 75 | 76 | - Change the priority of `request.meta.get('proxy')` and `pyppeteer_meta.get('proxy')` 77 | 78 | ## 0.0.9 (2020-07-31) 79 | 80 | ### Features 81 | 82 | - Add support for screenshot 83 | 84 | ### Bug Fixes 85 | 86 | - Fix bug of name `GERAPY_PYPPETEER_IGNORE_RESOURCE_TYPES` 87 | 88 | ## 0.0.8 (2020-07-26) 89 | 90 | ### Features 91 | 92 | - Add support for pretending as real Browser instead of WebDriver 93 | 94 | ### Bug Fixes 95 | 96 | - Fix bug of ValueError when `wait_until` is None 97 | - Fix error position of log message about `wait_for` 98 | 99 | ## 0.0.7 (2020-07-25) 100 | 101 | ### Features 102 | 103 | - Add meta info from PyppeteerRequest attributes 104 | 105 | ### Bug Fixes 106 | 107 | - Skip validation of PyppeteerRequest 108 | 109 | ## 0.0.5 (2020-07-20) 110 | 111 | ### Features 112 | 113 | - Add support for `ignoreHTTPSErrors`, `slowMo`, `ignoreDefaultArgs`, 114 | `handleSIGINT`, `handleSIGTERM`, `handleSIGHUP`, `autoClose` args. 115 | 116 | ## 0.0.4 (2020-07-15) 117 | 118 | ### Bug Fixes 119 | 120 | - Fix Bug of un-closing Pyppeteer when loaded failed 121 | 122 | ### Features 123 | 124 | - Add support for `GERAPY_IGNORE_RESOURCE_TYPES` 125 | - Add support for retrying 126 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Gerapy Pyppeteer 2 | 3 | This is a package for supporting pyppeteer in Scrapy, also this 4 | package is a module in [Gerapy](https://github.com/Gerapy/Gerapy). 5 | 6 | ## Installation 7 | 8 | ```shell script 9 | pip3 install gerapy-pyppeteer 10 | ``` 11 | 12 | ## Usage 13 | 14 | You can use `PyppeteerRequest` to specify a request which uses pyppeteer to render. 15 | 16 | For example: 17 | 18 | ```python 19 | yield PyppeteerRequest(detail_url, callback=self.parse_detail) 20 | ``` 21 | 22 | And you also need to enable `PyppeteerMiddleware` in `DOWNLOADER_MIDDLEWARES`: 23 | 24 | ```python 25 | DOWNLOADER_MIDDLEWARES = { 26 | 'gerapy_pyppeteer.downloadermiddlewares.PyppeteerMiddleware': 543, 27 | } 28 | ``` 29 | 30 | Congratulate, you've finished the all of the required configuration. 31 | 32 | If you run the Spider again, Pyppeteer will be started to render every 33 | web page which you configured the request as PyppeteerRequest. 34 | 35 | ## Settings 36 | 37 | GerapyPyppeteer provides some optional settings. 38 | 39 | ### Concurrency 40 | 41 | You can directly use Scrapy's setting to set Concurrency of Pyppeteer, 42 | for example: 43 | 44 | ```python 45 | CONCURRENT_REQUESTS = 3 46 | ``` 47 | 48 | ### Pretend as Real Browser 49 | 50 | Some website will detect WebDriver or Headless, GerapyPyppeteer can 51 | pretend Chromium by inject scripts. This is enabled by default. 52 | 53 | You can close it if website does not detect WebDriver to speed up: 54 | 55 | ```python 56 | GERAPY_PYPPETEER_PRETEND = False 57 | ``` 58 | 59 | Also you can use `pretend` attribute in `PyppeteerRequest` to overwrite this 60 | configuration. 61 | 62 | ### Logging Level 63 | 64 | By default, Pyppeteer will log all the debug messages, so GerapyPyppeteer 65 | configured the logging level of Pyppeteer to WARNING. 66 | 67 | If you want to see more logs from Pyppeteer, you can change the this setting: 68 | 69 | ```python 70 | import logging 71 | GERAPY_PYPPETEER_LOGGING_LEVEL = logging.DEBUG 72 | ``` 73 | 74 | ### Download Timeout 75 | 76 | Pyppeteer may take some time to render the required web page, you can also change this setting, default is `30s`: 77 | 78 | ```python 79 | # pyppeteer timeout 80 | GERAPY_PYPPETEER_DOWNLOAD_TIMEOUT = 30 81 | ``` 82 | 83 | ### Headless 84 | 85 | By default, Pyppeteer is running in `Headless` mode, you can also 86 | change it to `False` as you need, default is `True`: 87 | 88 | ```python 89 | GERAPY_PYPPETEER_HEADLESS = False 90 | ``` 91 | 92 | ### Window Size 93 | 94 | You can also set the width and height of Pyppeteer window: 95 | 96 | ```python 97 | GERAPY_PYPPETEER_WINDOW_WIDTH = 1400 98 | GERAPY_PYPPETEER_WINDOW_HEIGHT = 700 99 | ``` 100 | 101 | Default is 1400, 700. 102 | 103 | ### Proxy 104 | 105 | You can set a proxy channel via below this config: 106 | 107 | ```python 108 | GERAPY_PYPPETEER_PROXY = 'http://tps254.kdlapi.com:15818' 109 | GERAPY_PYPPETEER_PROXY_CREDENTIAL = { 110 | 'username': 'xxx', 111 | 'password': 'xxxx' 112 | } 113 | ``` 114 | 115 | ### Pyppeteer Args 116 | 117 | You can also change the args of Pyppeteer, such as `dumpio`, `devtools`, etc. 118 | 119 | Optional settings and their default values: 120 | 121 | ```python 122 | GERAPY_PYPPETEER_DUMPIO = False 123 | GERAPY_PYPPETEER_DEVTOOLS = False 124 | GERAPY_PYPPETEER_EXECUTABLE_PATH = None 125 | GERAPY_PYPPETEER_DISABLE_EXTENSIONS = True 126 | GERAPY_PYPPETEER_HIDE_SCROLLBARS = True 127 | GERAPY_PYPPETEER_MUTE_AUDIO = True 128 | GERAPY_PYPPETEER_NO_SANDBOX = True 129 | GERAPY_PYPPETEER_DISABLE_SETUID_SANDBOX = True 130 | GERAPY_PYPPETEER_DISABLE_GPU = True 131 | ``` 132 | 133 | ### Disable loading of specific resource type 134 | 135 | You can disable the loading of specific resource type to 136 | decrease the loading time of web page. You can configure 137 | the disabled resource types using `GERAPY_PYPPETEER_IGNORE_RESOURCE_TYPES`: 138 | 139 | ```python 140 | GERAPY_PYPPETEER_IGNORE_RESOURCE_TYPES = [] 141 | ``` 142 | 143 | For example, if you want to disable the loading of css and javascript, 144 | you can set as below: 145 | 146 | ```python 147 | GERAPY_PYPPETEER_IGNORE_RESOURCE_TYPES = ['stylesheet', 'script'] 148 | ``` 149 | 150 | All of the optional resource type list: 151 | 152 | - document: the Original HTML document 153 | - stylesheet: CSS files 154 | - script: JavaScript files 155 | - image: Images 156 | - media: Media files such as audios or videos 157 | - font: Fonts files 158 | - texttrack: Text Track files 159 | - xhr: Ajax Requests 160 | - fetch: Fetch Requests 161 | - eventsource: Event Source 162 | - websocket: Websocket 163 | - manifest: Manifest files 164 | - other: Other files 165 | 166 | ### Screenshot 167 | 168 | You can get screenshot of loaded page, you can pass `screenshot` args to `PyppeteerRequest` as dict: 169 | 170 | - `type` (str): Specify screenshot type, can be either `jpeg` or `png`. Defaults to `png`. 171 | - `quality` (int): The quality of the image, between 0-100. Not applicable to `png` image. 172 | - `fullPage` (bool): When true, take a screenshot of the full scrollable page. Defaults to `False`. 173 | - `clip` (dict): An object which specifies clipping region of the page. This option should have the following fields: 174 | - `x` (int): x-coordinate of top-left corner of clip area. 175 | - `y` (int): y-coordinate of top-left corner of clip area. 176 | - `width` (int): width of clipping area. 177 | - `height` (int): height of clipping area. 178 | - `omitBackground` (bool): Hide default white background and allow capturing screenshot with transparency. 179 | - `encoding` (str): The encoding of the image, can be either `base64` or `binary`. Defaults to `binary`. If binary it will return `BytesIO` object. 180 | 181 | For example: 182 | 183 | ```python 184 | yield PyppeteerRequest(start_url, callback=self.parse_index, wait_for='.item .name', screenshot={ 185 | 'type': 'png', 186 | 'fullPage': True 187 | }) 188 | ``` 189 | 190 | then you can get screenshot result in `response.meta['screenshot']`: 191 | 192 | Simplest save it to file: 193 | 194 | ```python 195 | def parse_index(self, response): 196 | with open('screenshot.png', 'wb') as f: 197 | f.write(response.meta['screenshot'].getbuffer()) 198 | ``` 199 | 200 | If you want to enable screenshot for all requests, you can configure it by `GERAPY_PYPPETEER_SCREENSHOT`. 201 | 202 | For example: 203 | 204 | ```python 205 | GERAPY_PYPPETEER_SCREENSHOT = { 206 | 'type': 'png', 207 | 'fullPage': True 208 | } 209 | ``` 210 | 211 | ## PyppeteerRequest 212 | 213 | `PyppeteerRequest` provide args which can override global settings above. 214 | 215 | - url: request url 216 | - callback: callback 217 | - one of "load", "domcontentloaded", "networkidle0", "networkidle2". 218 | see https://miyakogi.github.io/pyppeteer/reference.html#pyppeteer.page.Page.goto, default is `domcontentloaded` 219 | - wait_for: wait for some element to load, also supports dict 220 | - script: script to execute 221 | - actions: actions defined for execution of Page object 222 | - proxy: use proxy for this time, like `http://x.x.x.x:x` 223 | - proxy_credential: the proxy credential, like `{'username': 'xxxx', 'password': 'xxxx'}` 224 | - sleep: time to sleep after loaded, override `GERAPY_PYPPETEER_SLEEP` 225 | - timeout: load timeout, override `GERAPY_PYPPETEER_DOWNLOAD_TIMEOUT` 226 | - ignore_resource_types: ignored resource types, override `GERAPY_PYPPETEER_IGNORE_RESOURCE_TYPES` 227 | - pretend: pretend as normal browser, override `GERAPY_PYPPETEER_PRETEND` 228 | - screenshot: ignored resource types, see 229 | https://miyakogi.github.io/pyppeteer/_modules/pyppeteer/page.html#Page.screenshot, 230 | override `GERAPY_PYPPETEER_SCREENSHOT` 231 | 232 | For example, you can configure PyppeteerRequest as: 233 | 234 | ```python 235 | from gerapy_pyppeteer import PyppeteerRequest 236 | 237 | def parse(self, response): 238 | yield PyppeteerRequest(url, 239 | callback=self.parse_detail, 240 | wait_until='domcontentloaded', 241 | wait_for='title', 242 | script='() => { return {name: "Germey"} }', 243 | sleep=2) 244 | ``` 245 | 246 | Then Pyppeteer will: 247 | 248 | - wait for document to load 249 | - wait for title to load 250 | - execute `console.log(document)` script 251 | - sleep for 2s 252 | - return the rendered web page content, get from `response.meta['screenshot']` 253 | - return the script executed result, get from `response.meta['script_result']` 254 | 255 | For waiting mechanism controlled by JavaScript, you can use await in `script`, for example: 256 | 257 | ```python 258 | js = '''async () => { 259 | await new Promise(resolve => setTimeout(resolve, 10000)); 260 | return { 261 | 'name': 'Germey' 262 | } 263 | } 264 | ''' 265 | yield PyppeteerRequest(url, callback=self.parse, script=js) 266 | ``` 267 | 268 | Then you can get the script result from `response.meta['script_result']`, result is `{'name': 'Germey'}`. 269 | 270 | If you think the JavaScript is wired to write, you can use actions argument to define a function to execute `Python` based functions, for example: 271 | 272 | ```python 273 | async def execute_actions(page: Page): 274 | await page.evaluate('() => { document.title = "Hello World"; }') 275 | return 1 276 | yield PyppeteerRequest(url, callback=self.parse, actions=execute_actions) 277 | ``` 278 | 279 | Then you can get the actions result from `response.meta['actions_result']`, result is `1`. 280 | 281 | Also you can define proxy and proxy_credential for each Reqest, for example: 282 | 283 | ```python 284 | yield PyppeteerRequest( 285 | self.base_url, 286 | callback=self.parse_index, 287 | priority=10, 288 | proxy='http://tps254.kdlapi.com:15818', 289 | proxy_credential={ 290 | 'username': 'xxxx', 291 | 'password': 'xxxx' 292 | }) 293 | ``` 294 | 295 | `proxy` and `proxy_credential` will override the settings `GERAPY_PYPPETEER_PROXY` and `GERAPY_PYPPETEER_PROXY_CREDENTIAL`. 296 | 297 | ## Example 298 | 299 | For more detail, please see [example](./example). 300 | 301 | Also you can directly run with Docker: 302 | 303 | ``` 304 | docker run germey/gerapy-pyppeteer-example 305 | ``` 306 | 307 | Outputs: 308 | 309 | ```shell script 310 | 2020-07-13 01:49:13 [scrapy.utils.log] INFO: Scrapy 2.2.0 started (bot: example) 311 | 2020-07-13 01:49:13 [scrapy.utils.log] INFO: Versions: lxml 4.3.3.0, libxml2 2.9.9, cssselect 1.1.0, parsel 1.6.0, w3lib 1.22.0, Twisted 20.3.0, Python 3.7.7 (default, May 6 2020, 04:59:01) - [Clang 4.0.1 (tags/RELEASE_401/final)], pyOpenSSL 19.1.0 (OpenSSL 1.1.1d 10 Sep 2019), cryptography 2.8, Platform Darwin-19.4.0-x86_64-i386-64bit 312 | 2020-07-13 01:49:13 [scrapy.utils.log] DEBUG: Using reactor: twisted.internet.asyncioreactor.AsyncioSelectorReactor 313 | 2020-07-13 01:49:13 [scrapy.crawler] INFO: Overridden settings: 314 | {'BOT_NAME': 'example', 315 | 'CONCURRENT_REQUESTS': 3, 316 | 'NEWSPIDER_MODULE': 'example.spiders', 317 | 'RETRY_HTTP_CODES': [403, 500, 502, 503, 504], 318 | 'SPIDER_MODULES': ['example.spiders']} 319 | 2020-07-13 01:49:13 [scrapy.extensions.telnet] INFO: Telnet Password: 83c276fb41754bd0 320 | 2020-07-13 01:49:13 [scrapy.middleware] INFO: Enabled extensions: 321 | ['scrapy.extensions.corestats.CoreStats', 322 | 'scrapy.extensions.telnet.TelnetConsole', 323 | 'scrapy.extensions.memusage.MemoryUsage', 324 | 'scrapy.extensions.logstats.LogStats'] 325 | 2020-07-13 01:49:13 [scrapy.middleware] INFO: Enabled downloader middlewares: 326 | ['scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware', 327 | 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware', 328 | 'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware', 329 | 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware', 330 | 'gerapy_pyppeteer.downloadermiddlewares.PyppeteerMiddleware', 331 | 'scrapy.downloadermiddlewares.retry.RetryMiddleware', 332 | 'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware', 333 | 'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware', 334 | 'scrapy.downloadermiddlewares.redirect.RedirectMiddleware', 335 | 'scrapy.downloadermiddlewares.cookies.CookiesMiddleware', 336 | 'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware', 337 | 'scrapy.downloadermiddlewares.stats.DownloaderStats'] 338 | 2020-07-13 01:49:13 [scrapy.middleware] INFO: Enabled spider middlewares: 339 | ['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware', 340 | 'scrapy.spidermiddlewares.offsite.OffsiteMiddleware', 341 | 'scrapy.spidermiddlewares.referer.RefererMiddleware', 342 | 'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware', 343 | 'scrapy.spidermiddlewares.depth.DepthMiddleware'] 344 | 2020-07-13 01:49:13 [scrapy.middleware] INFO: Enabled item pipelines: 345 | [] 346 | 2020-07-13 01:49:13 [scrapy.core.engine] INFO: Spider opened 347 | 2020-07-13 01:49:13 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min) 348 | 2020-07-13 01:49:13 [scrapy.extensions.telnet] INFO: Telnet console listening on 127.0.0.1:6023 349 | 2020-07-13 01:49:13 [example.spiders.book] INFO: crawling https://dynamic5.scrape.center/page/1 350 | 2020-07-13 01:49:13 [gerapy.pyppeteer] DEBUG: processing request 351 | 2020-07-13 01:49:13 [gerapy.pyppeteer] DEBUG: set options {'headless': True, 'dumpio': False, 'devtools': False, 'args': ['--window-size=1400,700', '--disable-extensions', '--hide-scrollbars', '--mute-audio', '--no-sandbox', '--disable-setuid-sandbox', '--disable-gpu']} 352 | 2020-07-13 01:49:14 [gerapy.pyppeteer] DEBUG: crawling https://dynamic5.scrape.center/page/1 353 | 2020-07-13 01:49:19 [gerapy.pyppeteer] DEBUG: waiting for .item .name finished 354 | 2020-07-13 01:49:20 [gerapy.pyppeteer] DEBUG: wait for .item .name finished 355 | 2020-07-13 01:49:20 [gerapy.pyppeteer] DEBUG: close pyppeteer 356 | 2020-07-13 01:49:20 [scrapy.core.engine] DEBUG: Crawled (200) (referer: None) 357 | 2020-07-13 01:49:20 [gerapy.pyppeteer] DEBUG: processing request 358 | 2020-07-13 01:49:20 [gerapy.pyppeteer] DEBUG: processing request 359 | 2020-07-13 01:49:20 [gerapy.pyppeteer] DEBUG: processing request 360 | 2020-07-13 01:49:20 [gerapy.pyppeteer] DEBUG: set options {'headless': True, 'dumpio': False, 'devtools': False, 'args': ['--window-size=1400,700', '--disable-extensions', '--hide-scrollbars', '--mute-audio', '--no-sandbox', '--disable-setuid-sandbox', '--disable-gpu']} 361 | 2020-07-13 01:49:20 [gerapy.pyppeteer] DEBUG: set options {'headless': True, 'dumpio': False, 'devtools': False, 'args': ['--window-size=1400,700', '--disable-extensions', '--hide-scrollbars', '--mute-audio', '--no-sandbox', '--disable-setuid-sandbox', '--disable-gpu']} 362 | 2020-07-13 01:49:21 [gerapy.pyppeteer] DEBUG: set options {'headless': True, 'dumpio': False, 'devtools': False, 'args': ['--window-size=1400,700', '--disable-extensions', '--hide-scrollbars', '--mute-audio', '--no-sandbox', '--disable-setuid-sandbox', '--disable-gpu']} 363 | 2020-07-13 01:49:21 [gerapy.pyppeteer] DEBUG: crawling https://dynamic5.scrape.center/detail/26855315 364 | 2020-07-13 01:49:21 [gerapy.pyppeteer] DEBUG: crawling https://dynamic5.scrape.center/detail/26861389 365 | 2020-07-13 01:49:21 [gerapy.pyppeteer] DEBUG: crawling https://dynamic5.scrape.center/detail/26898909 366 | 2020-07-13 01:49:24 [gerapy.pyppeteer] DEBUG: waiting for .item .name finished 367 | 2020-07-13 01:49:24 [gerapy.pyppeteer] DEBUG: wait for .item .name finished 368 | 2020-07-13 01:49:24 [gerapy.pyppeteer] DEBUG: close pyppeteer 369 | 2020-07-13 01:49:24 [scrapy.core.engine] DEBUG: Crawled (200) (referer: https://dynamic5.scrape.center/page/1) 370 | 2020-07-13 01:49:24 [gerapy.pyppeteer] DEBUG: processing request 371 | 2020-07-13 01:49:24 [gerapy.pyppeteer] DEBUG: set options {'headless': True, 'dumpio': False, 'devtools': False, 'args': ['--window-size=1400,700', '--disable-extensions', '--hide-scrollbars', '--mute-audio', '--no-sandbox', '--disable-setuid-sandbox', '--disable-gpu']} 372 | 2020-07-13 01:49:25 [scrapy.core.scraper] DEBUG: Scraped from <200 https://dynamic5.scrape.center/detail/26861389> 373 | {'name': '壁穴ヘブンホール', 374 | 'score': '5.6', 375 | 'tags': ['BL漫画', '小基漫', 'BL', '『又腐又基』', 'BLコミック']} 376 | 2020-07-13 01:49:25 [gerapy.pyppeteer] DEBUG: waiting for .item .name finished 377 | 2020-07-13 01:49:25 [gerapy.pyppeteer] DEBUG: crawling https://dynamic5.scrape.center/page/2 378 | 2020-07-13 01:49:26 [gerapy.pyppeteer] DEBUG: wait for .item .name finished 379 | 2020-07-13 01:49:26 [gerapy.pyppeteer] DEBUG: close pyppeteer 380 | 2020-07-13 01:49:26 [scrapy.core.engine] DEBUG: Crawled (200) (referer: https://dynamic5.scrape.center/page/1) 381 | 2020-07-13 01:49:26 [gerapy.pyppeteer] DEBUG: processing request 382 | 2020-07-13 01:49:26 [gerapy.pyppeteer] DEBUG: set options {'headless': True, 'dumpio': False, 'devtools': False, 'args': ['--window-size=1400,700', '--disable-extensions', '--hide-scrollbars', '--mute-audio', '--no-sandbox', '--disable-setuid-sandbox', '--disable-gpu']} 383 | 2020-07-13 01:49:26 [scrapy.core.scraper] DEBUG: Scraped from <200 https://dynamic5.scrape.center/detail/26855315> 384 | {'name': '冒险小虎队', 'score': '9.4', 'tags': ['冒险小虎队', '童年', '冒险', '推理', '小时候读的']} 385 | 2020-07-13 01:49:26 [gerapy.pyppeteer] DEBUG: waiting for .item .name finished 386 | 2020-07-13 01:49:26 [gerapy.pyppeteer] DEBUG: crawling https://dynamic5.scrape.center/detail/27047626 387 | 2020-07-13 01:49:27 [gerapy.pyppeteer] DEBUG: wait for .item .name finished 388 | 2020-07-13 01:49:27 [gerapy.pyppeteer] DEBUG: close pyppeteer 389 | ... 390 | ``` 391 | 392 | ## Trouble Shooting 393 | 394 | ### Pyppeteer does not start properly 395 | 396 | Chromium download speed is too slow, it can not be used normally. 397 | 398 | Here are two solutions: 399 | 400 | #### Solution 1 (Recommended) 401 | 402 | Modify drive download source at `pyppeteer/chromium_downloader.py` line 22: 403 | 404 | ```python 405 | # Default: 406 | DEFAULT_DOWNLOAD_HOST = 'https://storage.googleapis.com' 407 | # modify 408 | DEFAULT_DOWNLOAD_HOST = http://npm.taobao.org/mirror 409 | ``` 410 | 411 | #### Solution 2 412 | 413 | Modify drive execution path at `pyppeteer/chromium_downloader.py` line 45: 414 | 415 | ```python 416 | # Default: 417 | chromiumExecutable = { 418 | 'linux': DOWNLOADS_FOLDER / REVISION / 'chrome-linux' / 'chrome', 419 | 'mac': (DOWNLOADS_FOLDER / REVISION / 'chrome-mac' / 'Chromium.app' / 420 | 'Contents' / 'MacOS' / 'Chromium'), 421 | 'win32': DOWNLOADS_FOLDER / REVISION / windowsArchive / 'chrome.exe', 422 | 'win64': DOWNLOADS_FOLDER / REVISION / windowsArchive / 'chrome.exe', 423 | } 424 | ``` 425 | 426 | You can find your own operating system, modify your chrome or chrome executable path. 427 | -------------------------------------------------------------------------------- /example/.dockerignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Python template 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | pip-wheel-metadata/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | cover/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | .pybuilder/ 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | # For a library or package, you might want to ignore these files since the code is 90 | # intended to run in multiple environments; otherwise, check them in: 91 | # .python-version 92 | 93 | # pipenv 94 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 95 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 96 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 97 | # install all needed dependencies. 98 | #Pipfile.lock 99 | 100 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 101 | __pypackages__/ 102 | 103 | # Celery stuff 104 | celerybeat-schedule 105 | celerybeat.pid 106 | 107 | # SageMath parsed files 108 | *.sage.py 109 | 110 | # Environments 111 | .env 112 | .venv 113 | env/ 114 | venv/ 115 | ENV/ 116 | env.bak/ 117 | venv.bak/ 118 | 119 | # Spyder project settings 120 | .spyderproject 121 | .spyproject 122 | 123 | # Rope project settings 124 | .ropeproject 125 | 126 | # mkdocs documentation 127 | /site 128 | 129 | # mypy 130 | .mypy_cache/ 131 | .dmypy.json 132 | dmypy.json 133 | 134 | # Pyre type checker 135 | .pyre/ 136 | 137 | # pytype static type analyzer 138 | .pytype/ 139 | 140 | # Cython debug symbols 141 | cython_debug/ 142 | 143 | -------------------------------------------------------------------------------- /example/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7 2 | 3 | RUN apt-get update && \ 4 | apt-get -y install libnss3 xvfb gconf-service libasound2 libatk1.0-0 libc6 libcairo2 libcups2 \ 5 | libdbus-1-3 libexpat1 libfontconfig1 libgbm1 libgcc1 libgconf-2-4 libgdk-pixbuf2.0-0 libglib2.0-0 \ 6 | libgtk-3-0 libnspr4 libpango-1.0-0 libpangocairo-1.0-0 libstdc++6 libx11-6 libx11-xcb1 libxcb1 \ 7 | libxcomposite1 libxcursor1 libxdamage1 libxext6 libxfixes3 libxi6 libxrandr2 libxrender1 libxss1 \ 8 | libxtst6 ca-certificates fonts-liberation libappindicator1 libnss3 lsb-release xdg-utils wget && \ 9 | rm -rf /var/lib/apt/lists/* 10 | 11 | RUN pip install -U pip && pip install pyppeteer && pyppeteer-install 12 | 13 | WORKDIR /code 14 | COPY requirements.txt . 15 | RUN pip install -r requirements.txt 16 | COPY . . 17 | CMD python3 run.py 18 | -------------------------------------------------------------------------------- /example/README.md: -------------------------------------------------------------------------------- 1 | # Gerapy Pyppeteer Example 2 | 3 | ## Run 4 | 5 | There are two ways to run this example: 6 | 7 | ### Run with Python 8 | 9 | ```shell script 10 | pip3 install -r requierments.txt 11 | pyppeteer-install 12 | python3 run.py 13 | ``` 14 | 15 | ### Run with Docker 16 | 17 | ```shell script 18 | docker run germey/gerapy-pyppeteer-example 19 | ``` 20 | 21 | If you want to build your own docker image, please remember to set: 22 | 23 | ```python 24 | GERAPY_PYPPETEER_HEADLESS = True 25 | GERAPY_PYPPETEER_NO_SANDBOX = True (default is True) 26 | ``` 27 | 28 | In your settings.py file. 29 | 30 | Otherwise, it won't works well. 31 | -------------------------------------------------------------------------------- /example/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | gerapy-pyppeteer-example: 4 | container_name: 'gerapy-pyppeteer-example' 5 | restart: always 6 | build: . 7 | image: 'germey/gerapy-pyppeteer-example' 8 | command: 'python3 run.py' -------------------------------------------------------------------------------- /example/example/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gerapy/GerapyPyppeteer/0143b2f862d3455e742c72aa541b0a6e89689627/example/example/__init__.py -------------------------------------------------------------------------------- /example/example/items.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your scraped items 4 | # 5 | # See documentation in: 6 | # https://docs.scrapy.org/en/latest/topics/items.html 7 | 8 | from scrapy import Field, Item 9 | 10 | 11 | class BookItem(Item): 12 | # define the fields for your item here like: 13 | # name = scrapy.Field() 14 | name = Field() 15 | tags = Field() 16 | score = Field() 17 | 18 | 19 | class MovieItem(Item): 20 | # define the fields for your item here like: 21 | # name = scrapy.Field() 22 | name = Field() 23 | categories = Field() 24 | score = Field() 25 | -------------------------------------------------------------------------------- /example/example/middlewares.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your spider middleware 4 | # 5 | # See documentation in: 6 | # https://docs.scrapy.org/en/latest/topics/spider-middleware.html 7 | 8 | from scrapy import signals 9 | 10 | 11 | class ExampleSpiderMiddleware: 12 | # Not all methods need to be defined. If a method is not defined, 13 | # scrapy acts as if the spider middleware does not modify the 14 | # passed objects. 15 | 16 | @classmethod 17 | def from_crawler(cls, crawler): 18 | # This method is used by Scrapy to create your spiders. 19 | s = cls() 20 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 21 | return s 22 | 23 | def process_spider_input(self, response, spider): 24 | # Called for each response that goes through the spider 25 | # middleware and into the spider. 26 | 27 | # Should return None or raise an exception. 28 | return None 29 | 30 | def process_spider_output(self, response, result, spider): 31 | # Called with the results returned from the Spider, after 32 | # it has processed the response. 33 | 34 | # Must return an iterable of Request, dict or Item objects. 35 | for i in result: 36 | yield i 37 | 38 | def process_spider_exception(self, response, exception, spider): 39 | # Called when a spider or process_spider_input() method 40 | # (from other spider middleware) raises an exception. 41 | 42 | # Should return either None or an iterable of Request, dict 43 | # or Item objects. 44 | pass 45 | 46 | def process_start_requests(self, start_requests, spider): 47 | # Called with the start requests of the spider, and works 48 | # similarly to the process_spider_output() method, except 49 | # that it doesn’t have a response associated. 50 | 51 | # Must return only requests (not items). 52 | for r in start_requests: 53 | yield r 54 | 55 | def spider_opened(self, spider): 56 | spider.logger.info('Spider opened: %s' % spider.name) 57 | 58 | 59 | class ExampleDownloaderMiddleware: 60 | # Not all methods need to be defined. If a method is not defined, 61 | # scrapy acts as if the downloader middleware does not modify the 62 | # passed objects. 63 | 64 | @classmethod 65 | def from_crawler(cls, crawler): 66 | # This method is used by Scrapy to create your spiders. 67 | s = cls() 68 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 69 | return s 70 | 71 | def process_request(self, request, spider): 72 | # Called for each request that goes through the downloader 73 | # middleware. 74 | 75 | # Must either: 76 | # - return None: continue processing this request 77 | # - or return a Response object 78 | # - or return a Request object 79 | # - or raise IgnoreRequest: process_exception() methods of 80 | # installed downloader middleware will be called 81 | return None 82 | 83 | def process_response(self, request, response, spider): 84 | # Called with the response returned from the downloader. 85 | 86 | # Must either; 87 | # - return a Response object 88 | # - return a Request object 89 | # - or raise IgnoreRequest 90 | return response 91 | 92 | def process_exception(self, request, exception, spider): 93 | # Called when a download handler or a process_request() 94 | # (from other downloader middleware) raises an exception. 95 | 96 | # Must either: 97 | # - return None: continue processing this exception 98 | # - return a Response object: stops process_exception() chain 99 | # - return a Request object: stops process_exception() chain 100 | pass 101 | 102 | def spider_opened(self, spider): 103 | spider.logger.info('Spider opened: %s' % spider.name) 104 | -------------------------------------------------------------------------------- /example/example/pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define your item pipelines here 4 | # 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting 6 | # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html 7 | 8 | 9 | class ExamplePipeline: 10 | def process_item(self, item, spider): 11 | return item 12 | -------------------------------------------------------------------------------- /example/example/settings.py: -------------------------------------------------------------------------------- 1 | BOT_NAME = 'example' 2 | 3 | SPIDER_MODULES = ['example.spiders'] 4 | NEWSPIDER_MODULE = 'example.spiders' 5 | 6 | # Obey robots.txt rules 7 | ROBOTSTXT_OBEY = False 8 | 9 | # Configure maximum concurrent requests performed by Scrapy (default: 16) 10 | CONCURRENT_REQUESTS = 1 11 | 12 | # Enable or disable downloader middlewares 13 | # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html 14 | DOWNLOADER_MIDDLEWARES = { 15 | 'gerapy_pyppeteer.downloadermiddlewares.PyppeteerMiddleware': 543, 16 | } 17 | 18 | RETRY_HTTP_CODES = [403, 500, 502, 503, 504] 19 | 20 | GERAPY_PYPPETEER_HEADLESS = False 21 | 22 | LOG_LEVEL = 'DEBUG' 23 | 24 | GERAPY_PYPPETEER_PRETEND = False 25 | 26 | GERAPY_PYPPETEER_SCREENSHOT = { 27 | 'type': 'png', 28 | 'fullPage': True 29 | } 30 | 31 | GERAPY_PYPPETEER_DOWNLOAD_TIMEOUT = 10 32 | -------------------------------------------------------------------------------- /example/example/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /example/example/spiders/book.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import re 3 | import scrapy 4 | from example.items import BookItem 5 | from gerapy_pyppeteer import PyppeteerRequest 6 | import logging 7 | from pyppeteer.page import Page 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | js = '''async () => { 13 | await new Promise(resolve => setTimeout(resolve, 10000)); 14 | return { 15 | 'name': 'Germey' 16 | } 17 | }''' 18 | 19 | 20 | async def execute_action(page: Page): 21 | await page.evaluate('() => { document.title = "Hello World"; }') 22 | return 1 23 | 24 | 25 | class BookSpider(scrapy.Spider): 26 | name = 'book' 27 | allowed_domains = ['spa5.scrape.center'] 28 | base_url = 'https://spa5.scrape.center' 29 | 30 | def start_requests(self): 31 | """ 32 | first page 33 | :return: 34 | """ 35 | start_url = f'{self.base_url}/page/1' 36 | logger.info('crawling %s', start_url) 37 | yield PyppeteerRequest(start_url, callback=self.parse_index, actions=execute_action, wait_for='.item .name', script=js) 38 | 39 | def parse_index(self, response): 40 | """ 41 | extract books and get next page 42 | :param response: 43 | :return: 44 | """ 45 | logger.debug('response meta %s', response.meta) 46 | items = response.css('.item') 47 | for item in items: 48 | href = item.css('.top a::attr(href)').extract_first() 49 | detail_url = response.urljoin(href) 50 | yield PyppeteerRequest(detail_url, callback=self.parse_detail, wait_for='.item .name') 51 | 52 | # next page 53 | match = re.search(r'page/(\d+)', response.url) 54 | if not match: 55 | return 56 | page = int(match.group(1)) + 1 57 | next_url = f'{self.base_url}/page/{page}' 58 | yield PyppeteerRequest(next_url, callback=self.parse_index, wait_for='.item .name') 59 | 60 | def parse_detail(self, response): 61 | """ 62 | process detail info of book 63 | :param response: 64 | :return: 65 | """ 66 | name = response.css('.name::text').extract_first() 67 | tags = response.css('.tags button span::text').extract() 68 | score = response.css('.score::text').extract_first() 69 | tags = [tag.strip() for tag in tags] if tags else [] 70 | score = score.strip() if score else None 71 | yield BookItem(name=name, tags=tags, score=score) 72 | -------------------------------------------------------------------------------- /example/example/spiders/movie.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import scrapy 3 | from gerapy_pyppeteer import PyppeteerRequest 4 | from scrapy import Request, signals 5 | from example.items import MovieItem 6 | import logging 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | class MovieSpider(scrapy.Spider): 12 | name = 'movie' 13 | allowed_domains = ['antispider1.scrape.center'] 14 | base_url = 'https://antispider1.scrape.center' 15 | max_page = 10 16 | custom_settings = { 17 | 'GERAPY_PYPPETEER_PRETEND': False 18 | } 19 | 20 | def start_requests(self): 21 | """ 22 | first page 23 | :return: 24 | """ 25 | for page in range(1, self.max_page + 1): 26 | url = f'{self.base_url}/page/{page}' 27 | logger.debug('start url %s', url) 28 | cookies = { 29 | 'name': 'germey' 30 | } 31 | yield PyppeteerRequest(url, callback=self.parse_index, priority=10, wait_for='.item', pretend=True, cookies=cookies) 32 | 33 | def parse_index(self, response): 34 | """ 35 | extract movies 36 | :param response: 37 | :return: 38 | """ 39 | items = response.css('.item') 40 | for item in items: 41 | href = item.css('a::attr(href)').extract_first() 42 | detail_url = response.urljoin(href) 43 | logger.info('detail url %s', detail_url) 44 | yield PyppeteerRequest(detail_url, callback=self.parse_detail, wait_for='.item') 45 | 46 | def parse_detail(self, response): 47 | """ 48 | process detail info of book 49 | :param response: 50 | :return: 51 | """ 52 | name = response.css('h2::text').extract_first() 53 | categories = response.css('.categories button span::text').extract() 54 | score = response.css('.score::text').extract_first() 55 | categories = [category.strip() 56 | for category in categories] if categories else [] 57 | score = score.strip() if score else None 58 | yield MovieItem(name=name, categories=categories, score=score) 59 | -------------------------------------------------------------------------------- /example/example/spiders/pretend_test.py: -------------------------------------------------------------------------------- 1 | import scrapy 2 | 3 | from gerapy_pyppeteer import PyppeteerRequest 4 | 5 | 6 | class PretendTestSpider(scrapy.Spider): 7 | name = 'pretend_test' 8 | custom_settings = { 9 | # change your local chrome path 10 | 'GERAPY_PYPPETEER_EXECUTABLE_PATH': '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', 11 | 'GERAPY_PYPPETEER_PRETEND': True 12 | } 13 | 14 | def start_requests(self): 15 | url = 'https://bot.sannysoft.com/' 16 | yield PyppeteerRequest(url=url, callback=self.parse_index, pretend=True, screenshot=False) 17 | 18 | def parse_index(self, response): 19 | import_test_name = response.xpath("//th[contains(text(), 'Test Name')]/../following-sibling::tr") 20 | import_test_result = f"\ntest_name\tresult_class\tresult\n" 21 | for i in import_test_name: 22 | test_name = i.xpath("string(./td[1])").get('') 23 | result_class = i.xpath("./td[2]/@class").re_first('passed|failed', '未知结果') 24 | result = i.xpath("./td[2]/text()").get('') 25 | import_test_result += f"{test_name}\t{result_class}\t{result}\n" 26 | self.logger.info(import_test_result) 27 | -------------------------------------------------------------------------------- /example/example/spiders/sports.py: -------------------------------------------------------------------------------- 1 | import scrapy 2 | from gerapy_pyppeteer import PyppeteerRequest 3 | 4 | 5 | class SportsSpider(scrapy.Spider): 6 | name = 'sports' 7 | allowed_domains = ['sports.qq.com'] 8 | start_urls = ['http://sports.qq.com/'] 9 | 10 | def start_requests(self): 11 | for url in self.start_urls: 12 | yield PyppeteerRequest(url, callback=self.parse_index, pretend=False) 13 | 14 | def parse_index(self, response): 15 | pass 16 | -------------------------------------------------------------------------------- /example/example/spiders/test_proxy.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import scrapy 3 | from gerapy_pyppeteer import PyppeteerRequest 4 | from scrapy import Request, signals 5 | from example.items import MovieItem 6 | import logging 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | class ProxySpider(scrapy.Spider): 12 | name = 'proxy' 13 | allowed_domains = ['www.httpbin.org'] 14 | base_url = 'https://www.httpbin.org/get' 15 | max_page = 10 16 | custom_settings = { 17 | 'GERAPY_PYPPETEER_PROXY': 'http://tps254.kdlapi.com:15818', 18 | 'GERAPY_PYPPETEER_PROXY_CREDENTIAL': { 19 | 'username': '', 20 | 'password': '' 21 | } 22 | } 23 | 24 | def start_requests(self): 25 | """ 26 | first page 27 | :return: 28 | """ 29 | yield PyppeteerRequest(self.base_url, 30 | callback=self.parse_index, 31 | priority=10, 32 | proxy='http://tps254.kdlapi.com:15818', 33 | proxy_credential={ 34 | 'username': '', 35 | 'password': '' 36 | }) 37 | 38 | def parse_index(self, response): 39 | """ 40 | extract movies 41 | :param response: 42 | :return: 43 | """ 44 | print(response.text) 45 | -------------------------------------------------------------------------------- /example/requirements.txt: -------------------------------------------------------------------------------- 1 | gerapy-pyppeteer -------------------------------------------------------------------------------- /example/run.py: -------------------------------------------------------------------------------- 1 | from scrapy.cmdline import execute 2 | 3 | execute('scrapy crawl movie'.split()) 4 | -------------------------------------------------------------------------------- /example/scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # https://scrapyd.readthedocs.io/en/latest/deploy.html 5 | 6 | [settings] 7 | default = example.settings 8 | 9 | [deploy] 10 | #url = http://localhost:6800/ 11 | project = example 12 | -------------------------------------------------------------------------------- /gerapy_pyppeteer/__init__.py: -------------------------------------------------------------------------------- 1 | from .downloadermiddlewares import PyppeteerMiddleware 2 | from .request import PyppeteerRequest 3 | -------------------------------------------------------------------------------- /gerapy_pyppeteer/__version__.py: -------------------------------------------------------------------------------- 1 | VERSION = (0, 2, '4') 2 | 3 | version = __version__ = '.'.join(map(str, VERSION)) 4 | -------------------------------------------------------------------------------- /gerapy_pyppeteer/downloadermiddlewares.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import sys 3 | import urllib.parse 4 | from io import BytesIO 5 | 6 | import twisted.internet 7 | from pyppeteer import launch 8 | from pyppeteer.errors import NetworkError, PageError, TimeoutError 9 | from scrapy.http import HtmlResponse 10 | from scrapy.utils.python import global_object_name 11 | from twisted.internet.asyncioreactor import AsyncioSelectorReactor 12 | from twisted.internet.defer import Deferred 13 | 14 | from gerapy_pyppeteer.pretend import SCRIPTS as PRETEND_SCRIPTS 15 | from gerapy_pyppeteer.settings import * 16 | 17 | if sys.platform == 'win32': 18 | asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) 19 | 20 | reactor = AsyncioSelectorReactor(asyncio.get_event_loop()) 21 | 22 | # install AsyncioSelectorReactor 23 | twisted.internet.reactor = reactor 24 | sys.modules['twisted.internet.reactor'] = reactor 25 | 26 | 27 | def as_deferred(f): 28 | """ 29 | transform a Twisted Deffered to an Asyncio Future 30 | :param f: async function 31 | """ 32 | return Deferred.fromFuture(asyncio.ensure_future(f)) 33 | 34 | 35 | logger = logging.getLogger('gerapy.pyppeteer') 36 | 37 | 38 | class PyppeteerMiddleware(object): 39 | """ 40 | Downloader middleware handling the requests with Puppeteer 41 | """ 42 | 43 | def _retry(self, request, reason, spider): 44 | """ 45 | get retry request 46 | :param request: 47 | :param reason: 48 | :param spider: 49 | :return: 50 | """ 51 | if not self.retry_enabled: 52 | return 53 | 54 | retries = request.meta.get('retry_times', 0) + 1 55 | retry_times = self.max_retry_times 56 | 57 | if 'max_retry_times' in request.meta: 58 | retry_times = request.meta['max_retry_times'] 59 | 60 | stats = spider.crawler.stats 61 | if retries <= retry_times: 62 | logger.debug("Retrying %(request)s (failed %(retries)d times): %(reason)s", 63 | {'request': request, 'retries': retries, 'reason': reason}, 64 | extra={'spider': spider}) 65 | retryreq = request.copy() 66 | retryreq.meta['retry_times'] = retries 67 | retryreq.dont_filter = True 68 | retryreq.priority = request.priority + self.priority_adjust 69 | 70 | if isinstance(reason, Exception): 71 | reason = global_object_name(reason.__class__) 72 | 73 | stats.inc_value('retry/count') 74 | stats.inc_value('retry/reason_count/%s' % reason) 75 | return retryreq 76 | else: 77 | stats.inc_value('retry/max_reached') 78 | logger.error("Gave up retrying %(request)s (failed %(retries)d times): %(reason)s", 79 | {'request': request, 'retries': retries, 'reason': reason}, 80 | extra={'spider': spider}) 81 | 82 | @classmethod 83 | def from_crawler(cls, crawler): 84 | """ 85 | init the middleware 86 | :param crawler: 87 | :return: 88 | """ 89 | settings = crawler.settings 90 | logging_level = settings.get( 91 | 'GERAPY_PYPPETEER_LOGGING_LEVEL', GERAPY_PYPPETEER_LOGGING_LEVEL) 92 | logging.getLogger('websockets').setLevel(logging_level) 93 | logging.getLogger('pyppeteer').setLevel(logging_level) 94 | 95 | # init settings 96 | cls.window_width = settings.get( 97 | 'GERAPY_PYPPETEER_WINDOW_WIDTH', GERAPY_PYPPETEER_WINDOW_WIDTH) 98 | cls.window_height = settings.get( 99 | 'GERAPY_PYPPETEER_WINDOW_HEIGHT', GERAPY_PYPPETEER_WINDOW_HEIGHT) 100 | cls.default_user_agent = settings.get('GERAPY_PYPPETEER_DEFAULT_USER_AGENT', 101 | GERAPY_PYPPETEER_DEFAULT_USER_AGENT) 102 | cls.headless = settings.get( 103 | 'GERAPY_PYPPETEER_HEADLESS', GERAPY_PYPPETEER_HEADLESS) 104 | cls.dumpio = settings.get( 105 | 'GERAPY_PYPPETEER_DUMPIO', GERAPY_PYPPETEER_DUMPIO) 106 | cls.ignore_https_errors = settings.get('GERAPY_PYPPETEER_IGNORE_HTTPS_ERRORS', 107 | GERAPY_PYPPETEER_IGNORE_HTTPS_ERRORS) 108 | cls.slow_mo = settings.get( 109 | 'GERAPY_PYPPETEER_SLOW_MO', GERAPY_PYPPETEER_SLOW_MO) 110 | cls.ignore_default_args = settings.get('GERAPY_PYPPETEER_IGNORE_DEFAULT_ARGS', 111 | GERAPY_PYPPETEER_IGNORE_DEFAULT_ARGS) 112 | cls.handle_sigint = settings.get( 113 | 'GERAPY_PYPPETEER_HANDLE_SIGINT', GERAPY_PYPPETEER_HANDLE_SIGINT) 114 | cls.handle_sigterm = settings.get( 115 | 'GERAPY_PYPPETEER_HANDLE_SIGTERM', GERAPY_PYPPETEER_HANDLE_SIGTERM) 116 | cls.handle_sighup = settings.get( 117 | 'GERAPY_PYPPETEER_HANDLE_SIGHUP', GERAPY_PYPPETEER_HANDLE_SIGHUP) 118 | cls.auto_close = settings.get( 119 | 'GERAPY_PYPPETEER_AUTO_CLOSE', GERAPY_PYPPETEER_AUTO_CLOSE) 120 | cls.devtools = settings.get( 121 | 'GERAPY_PYPPETEER_DEVTOOLS', GERAPY_PYPPETEER_DEVTOOLS) 122 | cls.executable_path = settings.get( 123 | 'GERAPY_PYPPETEER_EXECUTABLE_PATH', GERAPY_PYPPETEER_EXECUTABLE_PATH) 124 | cls.disable_extensions = settings.get('GERAPY_PYPPETEER_DISABLE_EXTENSIONS', 125 | GERAPY_PYPPETEER_DISABLE_EXTENSIONS) 126 | cls.hide_scrollbars = settings.get( 127 | 'GERAPY_PYPPETEER_HIDE_SCROLLBARS', GERAPY_PYPPETEER_HIDE_SCROLLBARS) 128 | cls.mute_audio = settings.get( 129 | 'GERAPY_PYPPETEER_MUTE_AUDIO', GERAPY_PYPPETEER_MUTE_AUDIO) 130 | cls.no_sandbox = settings.get( 131 | 'GERAPY_PYPPETEER_NO_SANDBOX', GERAPY_PYPPETEER_NO_SANDBOX) 132 | cls.disable_setuid_sandbox = settings.get('GERAPY_PYPPETEER_DISABLE_SETUID_SANDBOX', 133 | GERAPY_PYPPETEER_DISABLE_SETUID_SANDBOX) 134 | cls.disable_gpu = settings.get( 135 | 'GERAPY_PYPPETEER_DISABLE_GPU', GERAPY_PYPPETEER_DISABLE_GPU) 136 | cls.download_timeout = settings.get('GERAPY_PYPPETEER_DOWNLOAD_TIMEOUT', 137 | settings.get('DOWNLOAD_TIMEOUT', GERAPY_PYPPETEER_DOWNLOAD_TIMEOUT)) 138 | cls.ignore_resource_types = settings.get('GERAPY_PYPPETEER_IGNORE_RESOURCE_TYPES', 139 | GERAPY_PYPPETEER_IGNORE_RESOURCE_TYPES) 140 | cls.screenshot = settings.get( 141 | 'GERAPY_PYPPETEER_SCREENSHOT', GERAPY_PYPPETEER_SCREENSHOT) 142 | cls.pretend = settings.get( 143 | 'GERAPY_PYPPETEER_PRETEND', GERAPY_PYPPETEER_PRETEND) 144 | cls.sleep = settings.get( 145 | 'GERAPY_PYPPETEER_SLEEP', GERAPY_PYPPETEER_SLEEP) 146 | cls.enable_request_interception = settings.getbool('GERAPY_ENABLE_REQUEST_INTERCEPTION', 147 | GERAPY_ENABLE_REQUEST_INTERCEPTION) 148 | cls.retry_enabled = settings.getbool('RETRY_ENABLED') 149 | cls.max_retry_times = settings.getint('RETRY_TIMES') 150 | cls.retry_http_codes = set(int(x) 151 | for x in settings.getlist('RETRY_HTTP_CODES')) 152 | cls.priority_adjust = settings.getint('RETRY_PRIORITY_ADJUST') 153 | cls.proxy = settings.get('GERAPY_PYPPETEER_PROXY') 154 | cls.proxy_credential = settings.get( 155 | 'GERAPY_PYPPETEER_PROXY_CREDENTIAL') 156 | return cls() 157 | 158 | async def _process_request(self, request, spider): 159 | """ 160 | use pyppeteer to process spider 161 | :param request: 162 | :param spider: 163 | :return: 164 | """ 165 | # get pyppeteer meta 166 | pyppeteer_meta = request.meta.get('pyppeteer') or {} 167 | logger.debug('pyppeteer_meta %s', pyppeteer_meta) 168 | if not isinstance(pyppeteer_meta, dict) or len(pyppeteer_meta.keys()) == 0: 169 | return 170 | 171 | options = { 172 | 'headless': self.headless, 173 | 'dumpio': self.dumpio, 174 | 'devtools': self.devtools, 175 | 'args': [ 176 | f'--window-size={self.window_width},{self.window_height}', 177 | ] 178 | } 179 | if self.executable_path: 180 | options['executablePath'] = self.executable_path 181 | if self.ignore_https_errors: 182 | options['ignoreHTTPSErrors'] = self.ignore_https_errors 183 | if self.slow_mo: 184 | options['slowMo'] = self.slow_mo 185 | if self.ignore_default_args: 186 | options['ignoreDefaultArgs'] = self.ignore_default_args 187 | if self.handle_sigint: 188 | options['handleSIGINT'] = self.handle_sigint 189 | if self.handle_sigterm: 190 | options['handleSIGTERM'] = self.handle_sigterm 191 | if self.handle_sighup: 192 | options['handleSIGHUP'] = self.handle_sighup 193 | if self.auto_close: 194 | options['autoClose'] = self.auto_close 195 | if self.disable_extensions: 196 | options['args'].append('--disable-extensions') 197 | if self.hide_scrollbars: 198 | options['args'].append('--hide-scrollbars') 199 | if self.mute_audio: 200 | options['args'].append('--mute-audio') 201 | if self.no_sandbox: 202 | options['args'].append('--no-sandbox') 203 | if self.disable_setuid_sandbox: 204 | options['args'].append('--disable-setuid-sandbox') 205 | if self.disable_gpu: 206 | options['args'].append('--disable-gpu') 207 | 208 | # pretend as normal browser 209 | _pretend = self.pretend # get global pretend setting 210 | if pyppeteer_meta.get('pretend') is not None: 211 | # get local pretend setting to overwrite global 212 | _pretend = pyppeteer_meta.get('pretend') 213 | if _pretend: 214 | options['ignoreDefaultArgs'] = [ 215 | '--enable-automation' 216 | ] 217 | options['args'].append( 218 | '--disable-blink-features=AutomationControlled') 219 | 220 | # set proxy 221 | _proxy = self.proxy 222 | if pyppeteer_meta.get('proxy') is not None: 223 | _proxy = pyppeteer_meta.get('proxy') 224 | if _proxy: 225 | options['args'].append(f'--proxy-server={_proxy}') 226 | logger.debug('set options %s', options) 227 | 228 | browser = await launch(options) 229 | page = None 230 | 231 | try: 232 | page = await browser.newPage() 233 | except NetworkError: 234 | logger.error( 235 | 'network error occurred while launching pyppeteer page') 236 | await page.close() 237 | await browser.close() 238 | return self._retry(request, 504, spider) 239 | 240 | # set proxy auth credential, see more from 241 | # https://pyppeteer.github.io/pyppeteer/reference.html?highlight=auth#pyppeteer.page.Page.authenticate 242 | _proxy_credential = self.proxy_credential 243 | if pyppeteer_meta.get('proxy_credential') is not None: 244 | _proxy_credential = pyppeteer_meta.get('proxy_credential') 245 | if _proxy_credential: 246 | await page.authenticate(_proxy_credential) 247 | 248 | await page.setViewport({'width': self.window_width, 'height': self.window_height}) 249 | 250 | if _pretend: 251 | _default_user_agent = self.default_user_agent 252 | # get Scrapy request ua, exclude default('Scrapy/2.5.0 (+https://scrapy.org)') 253 | if 'Scrapy' not in request.headers.get('User-Agent').decode(): 254 | _default_user_agent = request.headers.get( 255 | 'User-Agent').decode() 256 | await page.setUserAgent(_default_user_agent) 257 | logger.debug('PRETEND_SCRIPTS is run') 258 | for script in PRETEND_SCRIPTS: 259 | await page.evaluateOnNewDocument(script) 260 | 261 | # set cookies 262 | parse_result = urllib.parse.urlsplit(request.url) 263 | domain = parse_result.hostname 264 | _cookies = [] 265 | if isinstance(request.cookies, dict): 266 | _cookies = [{'name': k, 'value': v, 'domain': domain} 267 | for k, v in request.cookies.items()] 268 | else: 269 | for _cookie in _cookies: 270 | if isinstance(_cookie, dict) and 'domain' not in _cookie.keys(): 271 | _cookie['domain'] = domain 272 | await page.setCookie(*_cookies) 273 | 274 | # the headers must be set using request interception 275 | await page.setRequestInterception(self.enable_request_interception) 276 | 277 | if self.enable_request_interception: 278 | @page.on('request') 279 | async def _handle_interception(pu_request): 280 | # handle headers 281 | overrides = { 282 | 'headers': pu_request.headers 283 | } 284 | # handle resource types 285 | _ignore_resource_types = self.ignore_resource_types 286 | if request.meta.get('pyppeteer', {}).get('ignore_resource_types') is not None: 287 | _ignore_resource_types = request.meta.get( 288 | 'pyppeteer', {}).get('ignore_resource_types') 289 | if pu_request.resourceType in _ignore_resource_types: 290 | await pu_request.abort() 291 | else: 292 | await pu_request.continue_(overrides) 293 | 294 | _timeout = self.download_timeout 295 | if pyppeteer_meta.get('timeout') is not None: 296 | _timeout = pyppeteer_meta.get('timeout') 297 | 298 | logger.debug('crawling %s', request.url) 299 | 300 | response = None 301 | try: 302 | options = { 303 | 'timeout': 1000 * _timeout 304 | } 305 | if pyppeteer_meta.get('wait_until'): 306 | options['waitUntil'] = pyppeteer_meta.get('wait_until') 307 | logger.debug('request %s with options %s', request.url, options) 308 | response = await page.goto( 309 | request.url, 310 | options=options 311 | ) 312 | except (PageError, TimeoutError): 313 | logger.exception( 314 | 'error rendering url %s using pyppeteer', request.url, exc_info=True) 315 | await page.close() 316 | await browser.close() 317 | return self._retry(request, 504, spider) 318 | 319 | # wait for dom loaded 320 | if pyppeteer_meta.get('wait_for'): 321 | _wait_for = pyppeteer_meta.get('wait_for') 322 | try: 323 | logger.debug('waiting for %s', _wait_for) 324 | if isinstance(_wait_for, dict): 325 | await page.waitFor(**_wait_for) 326 | else: 327 | await page.waitFor(_wait_for) 328 | except TimeoutError: 329 | logger.exception('error waiting for %s of %s', 330 | _wait_for, request.url, exc_info=True) 331 | await page.close() 332 | await browser.close() 333 | return self._retry(request, 504, spider) 334 | 335 | _actions_result = None 336 | # evaluate actions 337 | if pyppeteer_meta.get('actions'): 338 | _actions = pyppeteer_meta.get('actions') 339 | logger.debug('evaluating %s', _actions) 340 | _actions_result = await _actions(page) 341 | 342 | _script_result = None 343 | # evaluate script 344 | if pyppeteer_meta.get('script'): 345 | _script = pyppeteer_meta.get('script') 346 | logger.debug('evaluating %s', _script) 347 | _script_result = await page.evaluate(_script) 348 | 349 | # sleep 350 | _sleep = self.sleep 351 | if pyppeteer_meta.get('sleep') is not None: 352 | _sleep = pyppeteer_meta.get('sleep') 353 | if _sleep is not None: 354 | logger.debug('sleep for %ss', _sleep) 355 | await asyncio.sleep(_sleep) 356 | 357 | content = await page.content() 358 | body = str.encode(content) 359 | 360 | # screenshot 361 | # TODO: maybe add support for `enabled` sub attribute 362 | _screenshot = self.screenshot 363 | if pyppeteer_meta.get('screenshot') is not None: 364 | _screenshot = pyppeteer_meta.get('screenshot') 365 | screenshot = None 366 | if _screenshot: 367 | # pop path to not save img directly in this middleware 368 | if isinstance(_screenshot, dict) and 'path' in _screenshot.keys(): 369 | _screenshot.pop('path') 370 | logger.debug('taking screenshot using args %s', _screenshot) 371 | screenshot = await page.screenshot(_screenshot) 372 | if isinstance(screenshot, bytes): 373 | screenshot = BytesIO(screenshot) 374 | 375 | # close page and browser 376 | logger.debug('close pyppeteer') 377 | await page.close() 378 | await browser.close() 379 | 380 | if not response: 381 | logger.error( 382 | 'get null response by pyppeteer of url %s', request.url) 383 | 384 | # Necessary to bypass the compression middleware (?) 385 | response.headers.pop('content-encoding', None) 386 | response.headers.pop('Content-Encoding', None) 387 | 388 | response = HtmlResponse( 389 | page.url, 390 | status=response.status, 391 | headers=response.headers, 392 | body=body, 393 | encoding='utf-8', 394 | request=request 395 | ) 396 | if _script_result: 397 | response.meta['script_result'] = _script_result 398 | if _actions_result: 399 | response.meta['actions_result'] = _actions_result 400 | if screenshot: 401 | response.meta['screenshot'] = screenshot 402 | return response 403 | 404 | def process_request(self, request, spider): 405 | """ 406 | process request using pyppeteer 407 | :param request: 408 | :param spider: 409 | :return: 410 | """ 411 | logger.debug('processing request %s', request) 412 | return as_deferred(self._process_request(request, spider)) 413 | 414 | async def _spider_closed(self): 415 | pass 416 | 417 | def spider_closed(self): 418 | """ 419 | callback when spider closed 420 | :return: 421 | """ 422 | return as_deferred(self._spider_closed()) 423 | -------------------------------------------------------------------------------- /gerapy_pyppeteer/pretend.py: -------------------------------------------------------------------------------- 1 | SET_USER_AGENT = '''() => {Object.defineProperty(navigator, 'userAgent', {get: () => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'})}''' 2 | SET_APP_VERSION = '''() => {Object.defineProperty(navigator, 'appVersion', {get: () => '5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'})}''' 3 | EXTEND_LANGUAGES = '''() => {Object.defineProperty(navigator, 'languages', {get: () => ['zh-CN', 'zh', 'en', 'zh-TW', 'ja']})}''' 4 | EXTEND_PLUGINS = '''() => {Object.defineProperty(navigator, 'plugins', {get: () => [0, 1, 2, 3, 4]})}''' 5 | EXTEND_MIME_TYPES = '''() => {Object.defineProperty(navigator, 'mimeTypes', {get: () => [0, 1, 2, 3, 4]})}''' 6 | SET_WEBGL = '''() => { 7 | const getParameter = WebGLRenderingContext.getParameter 8 | WebGLRenderingContext.prototype.getParameter = (parameter) => { 9 | if (parameter === 37445) { 10 | return 'Intel Open Source Technology Center' 11 | } 12 | if (parameter === 37446) { 13 | return 'Mesa DRI Intel(R) Ivybridge Mobile ' 14 | } 15 | return getParameter(parameter) 16 | } 17 | } 18 | ''' 19 | SET_CHROME_INFO = '''() => { 20 | Object.defineProperty(window, 'chrome', { 21 | "app": { 22 | "isInstalled": false, 23 | "InstallState": {"DISABLED": "disabled", "INSTALLED": "installed", "NOT_INSTALLED": "not_installed"}, 24 | "RunningState": {"CANNOT_RUN": "cannot_run", "READY_TO_RUN": "ready_to_run", "RUNNING": "running"} 25 | }, 26 | "runtime": { 27 | "OnInstalledReason": { 28 | "CHROME_UPDATE": "chrome_update", 29 | "INSTALL": "install", 30 | "SHARED_MODULE_UPDATE": "shared_module_update", 31 | "UPDATE": "update" 32 | }, 33 | "OnRestartRequiredReason": {"APP_UPDATE": "app_update", "OS_UPDATE": "os_update", "PERIODIC": "periodic"}, 34 | "PlatformArch": { 35 | "ARM": "arm", 36 | "ARM64": "arm64", 37 | "MIPS": "mips", 38 | "MIPS64": "mips64", 39 | "X86_32": "x86-32", 40 | "X86_64": "x86-64" 41 | }, 42 | "PlatformNaclArch": {"ARM": "arm", "MIPS": "mips", "MIPS64": "mips64", "X86_32": "x86-32", "X86_64": "x86-64"}, 43 | "PlatformOs": { 44 | "ANDROID": "android", 45 | "CROS": "cros", 46 | "LINUX": "linux", 47 | "MAC": "mac", 48 | "OPENBSD": "openbsd", 49 | "WIN": "win" 50 | }, 51 | "RequestUpdateCheckStatus": { 52 | "NO_UPDATE": "no_update", 53 | "THROTTLED": "throttled", 54 | "UPDATE_AVAILABLE": "update_available" 55 | } 56 | } 57 | }) 58 | } 59 | ''' 60 | 61 | SET_PERMISSION = '''() => { 62 | const originalQuery = window.navigator.permissions.query; 63 | return window.navigator.permissions.query = (parameters) => ( 64 | parameters.name === 'notifications' ? 65 | Promise.resolve({ state: Notification.permission }) : 66 | originalQuery(parameters) 67 | ) 68 | } 69 | ''' 70 | 71 | # thanks kingname, https://mp.weixin.qq.com/s/Bge-_yiatSq4CQq7fRvjdQ 72 | # SET_WEBDRIVER = '''() => {Object.defineProperty(navigator, 'webdriver', {get: () => undefined})}''' 73 | SET_WEBDRIVER = '''/*! 74 | * Note: Auto-generated, do not update manually. 75 | * Generated by: https://github.com/berstend/puppeteer-extra/tree/master/packages/extract-stealth-evasions 76 | * Generated on: Sat, 28 Nov 2020 07:21:38 GMT 77 | * License: MIT 78 | */ 79 | var opts;(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.preloadCache(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {}\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function() {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = stack => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => index !== 1)\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = stack => {\n const stackArr = stack.split('\\n')\n const anchor = `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n // Cache (per-window) the original native toString or use that if available\n utils.preloadCache()\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n utils.preloadCache()\n\n const toStringProxy = new Proxy(Function.prototype.toString, {\n apply: function(target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n })\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n utils.preloadCache()\n\n const toStringProxy = new Proxy(Function.prototype.toString, {\n apply: function(target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n })\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n utils.preloadCache()\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n utils.preloadCache()\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n utils.preloadCache()\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath\n .split('.')\n .slice(0, -1)\n .join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}"},_mainFunction:'utils => {\n if (!window.chrome) {\n // Use the exact property descriptor found in headful Chrome\n // fetch it via `Object.getOwnPropertyDescriptor(window, \'chrome\')`\n Object.defineProperty(window, \'chrome\', {\n writable: true,\n enumerable: true,\n configurable: false, // note!\n value: {} // We\'ll extend that later\n })\n }\n\n // That means we\'re running headful and don\'t need to mock anything\n if (\'app\' in window.chrome) {\n return // Nothing to do here\n }\n\n const makeError = {\n ErrorInInvocation: fn => {\n const err = new TypeError(`Error in invocation of app.${fn}()`)\n return utils.stripErrorWithAnchor(\n err,\n `at ${fn} (eval at `\n )\n }\n }\n\n // There\'s a some static data in that property which doesn\'t seem to change,\n // we should periodically check for updates: `JSON.stringify(window.app, null, 2)`\n const STATIC_DATA = JSON.parse(\n `\n{\n "isInstalled": false,\n "InstallState": {\n "DISABLED": "disabled",\n "INSTALLED": "installed",\n "NOT_INSTALLED": "not_installed"\n },\n "RunningState": {\n "CANNOT_RUN": "cannot_run",\n "READY_TO_RUN": "ready_to_run",\n "RUNNING": "running"\n }\n}\n `.trim()\n )\n\n window.chrome.app = {\n ...STATIC_DATA,\n\n get isInstalled() {\n return false\n },\n\n getDetails: function getDetails() {\n if (arguments.length) {\n throw makeError.ErrorInInvocation(`getDetails`)\n }\n return null\n },\n getIsInstalled: function getDetails() {\n if (arguments.length) {\n throw makeError.ErrorInInvocation(`getIsInstalled`)\n }\n return false\n },\n runningState: function getDetails() {\n if (arguments.length) {\n throw makeError.ErrorInInvocation(`runningState`)\n }\n return \'cannot_run\'\n }\n }\n utils.patchToStringNested(window.chrome.app)\n }',_args:[]}),(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.preloadCache(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {}\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function() {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = stack => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => index !== 1)\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = stack => {\n const stackArr = stack.split('\\n')\n const anchor = `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n // Cache (per-window) the original native toString or use that if available\n utils.preloadCache()\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n utils.preloadCache()\n\n const toStringProxy = new Proxy(Function.prototype.toString, {\n apply: function(target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n })\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n utils.preloadCache()\n\n const toStringProxy = new Proxy(Function.prototype.toString, {\n apply: function(target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n })\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n utils.preloadCache()\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n utils.preloadCache()\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n utils.preloadCache()\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath\n .split('.')\n .slice(0, -1)\n .join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}"},_mainFunction:"utils => {\n if (!window.chrome) {\n // Use the exact property descriptor found in headful Chrome\n // fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')`\n Object.defineProperty(window, 'chrome', {\n writable: true,\n enumerable: true,\n configurable: false, // note!\n value: {} // We'll extend that later\n })\n }\n\n // That means we're running headful and don't need to mock anything\n if ('csi' in window.chrome) {\n return // Nothing to do here\n }\n\n // Check that the Navigation Timing API v1 is available, we need that\n if (!window.performance || !window.performance.timing) {\n return\n }\n\n const { timing } = window.performance\n\n window.chrome.csi = function() {\n return {\n onloadT: timing.domContentLoadedEventEnd,\n startE: timing.navigationStart,\n pageT: Date.now() - timing.navigationStart,\n tran: 15 // Transition type or something\n }\n }\n utils.patchToString(window.chrome.csi)\n }",_args:[]}),(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.preloadCache(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {}\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function() {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = stack => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => index !== 1)\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = stack => {\n const stackArr = stack.split('\\n')\n const anchor = `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n // Cache (per-window) the original native toString or use that if available\n utils.preloadCache()\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n utils.preloadCache()\n\n const toStringProxy = new Proxy(Function.prototype.toString, {\n apply: function(target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n })\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n utils.preloadCache()\n\n const toStringProxy = new Proxy(Function.prototype.toString, {\n apply: function(target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n })\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n utils.preloadCache()\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n utils.preloadCache()\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n utils.preloadCache()\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath\n .split('.')\n .slice(0, -1)\n .join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}"},_mainFunction:"(utils, { opts }) => {\n if (!window.chrome) {\n // Use the exact property descriptor found in headful Chrome\n // fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')`\n Object.defineProperty(window, 'chrome', {\n writable: true,\n enumerable: true,\n configurable: false, // note!\n value: {} // We'll extend that later\n })\n }\n\n // That means we're running headful and don't need to mock anything\n if ('loadTimes' in window.chrome) {\n return // Nothing to do here\n }\n\n // Check that the Navigation Timing API v1 + v2 is available, we need that\n if (\n !window.performance ||\n !window.performance.timing ||\n !window.PerformancePaintTiming\n ) {\n return\n }\n\n const { performance } = window\n\n // Some stuff is not available on about:blank as it requires a navigation to occur,\n // let's harden the code to not fail then:\n const ntEntryFallback = {\n nextHopProtocol: 'h2',\n type: 'other'\n }\n\n // The API exposes some funky info regarding the connection\n const protocolInfo = {\n get connectionInfo() {\n const ntEntry =\n performance.getEntriesByType('navigation')[0] || ntEntryFallback\n return ntEntry.nextHopProtocol\n },\n get npnNegotiatedProtocol() {\n // NPN is deprecated in favor of ALPN, but this implementation returns the\n // HTTP/2 or HTTP2+QUIC/39 requests negotiated via ALPN.\n const ntEntry =\n performance.getEntriesByType('navigation')[0] || ntEntryFallback\n return ['h2', 'hq'].includes(ntEntry.nextHopProtocol)\n ? ntEntry.nextHopProtocol\n : 'unknown'\n },\n get navigationType() {\n const ntEntry =\n performance.getEntriesByType('navigation')[0] || ntEntryFallback\n return ntEntry.type\n },\n get wasAlternateProtocolAvailable() {\n // The Alternate-Protocol header is deprecated in favor of Alt-Svc\n // (https://www.mnot.net/blog/2016/03/09/alt-svc), so technically this\n // should always return false.\n return false\n },\n get wasFetchedViaSpdy() {\n // SPDY is deprecated in favor of HTTP/2, but this implementation returns\n // true for HTTP/2 or HTTP2+QUIC/39 as well.\n const ntEntry =\n performance.getEntriesByType('navigation')[0] || ntEntryFallback\n return ['h2', 'hq'].includes(ntEntry.nextHopProtocol)\n },\n get wasNpnNegotiated() {\n // NPN is deprecated in favor of ALPN, but this implementation returns true\n // for HTTP/2 or HTTP2+QUIC/39 requests negotiated via ALPN.\n const ntEntry =\n performance.getEntriesByType('navigation')[0] || ntEntryFallback\n return ['h2', 'hq'].includes(ntEntry.nextHopProtocol)\n }\n }\n\n const { timing } = window.performance\n\n // Truncate number to specific number of decimals, most of the `loadTimes` stuff has 3\n function toFixed(num, fixed) {\n var re = new RegExp('^-?\\\\d+(?:.\\\\d{0,' + (fixed || -1) + '})?')\n return num.toString().match(re)[0]\n }\n\n const timingInfo = {\n get firstPaintAfterLoadTime() {\n // This was never actually implemented and always returns 0.\n return 0\n },\n get requestTime() {\n return timing.navigationStart / 1000\n },\n get startLoadTime() {\n return timing.navigationStart / 1000\n },\n get commitLoadTime() {\n return timing.responseStart / 1000\n },\n get finishDocumentLoadTime() {\n return timing.domContentLoadedEventEnd / 1000\n },\n get finishLoadTime() {\n return timing.loadEventEnd / 1000\n },\n get firstPaintTime() {\n const fpEntry = performance.getEntriesByType('paint')[0] || {\n startTime: timing.loadEventEnd / 1000 // Fallback if no navigation occured (`about:blank`)\n }\n return toFixed(\n (fpEntry.startTime + performance.timeOrigin) / 1000,\n 3\n )\n }\n }\n\n window.chrome.loadTimes = function() {\n return {\n ...protocolInfo,\n ...timingInfo\n }\n }\n utils.patchToString(window.chrome.loadTimes)\n }",_args:[{opts:{}}]}),(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.preloadCache(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {}\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function() {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = stack => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => index !== 1)\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = stack => {\n const stackArr = stack.split('\\n')\n const anchor = `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n // Cache (per-window) the original native toString or use that if available\n utils.preloadCache()\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n utils.preloadCache()\n\n const toStringProxy = new Proxy(Function.prototype.toString, {\n apply: function(target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n })\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n utils.preloadCache()\n\n const toStringProxy = new Proxy(Function.prototype.toString, {\n apply: function(target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n })\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n utils.preloadCache()\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n utils.preloadCache()\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n utils.preloadCache()\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath\n .split('.')\n .slice(0, -1)\n .join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}"},_mainFunction:"(utils, { opts, STATIC_DATA }) => {\n if (!window.chrome) {\n // Use the exact property descriptor found in headful Chrome\n // fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')`\n Object.defineProperty(window, 'chrome', {\n writable: true,\n enumerable: true,\n configurable: false, // note!\n value: {} // We'll extend that later\n })\n }\n\n // That means we're running headful and don't need to mock anything\n const existsAlready = 'runtime' in window.chrome\n // `chrome.runtime` is only exposed on secure origins\n const isNotSecure = !window.location.protocol.startsWith('https')\n if (existsAlready || (isNotSecure && !opts.runOnInsecureOrigins)) {\n return // Nothing to do here\n }\n\n window.chrome.runtime = {\n // There's a bunch of static data in that property which doesn't seem to change,\n // we should periodically check for updates: `JSON.stringify(window.chrome.runtime, null, 2)`\n ...STATIC_DATA,\n // `chrome.runtime.id` is extension related and returns undefined in Chrome\n get id() {\n return undefined\n },\n // These two require more sophisticated mocks\n connect: null,\n sendMessage: null\n }\n\n const makeCustomRuntimeErrors = (preamble, method, extensionId) => ({\n NoMatchingSignature: new TypeError(\n preamble + `No matching signature.`\n ),\n MustSpecifyExtensionID: new TypeError(\n preamble +\n `${method} called from a webpage must specify an Extension ID (string) for its first argument.`\n ),\n InvalidExtensionID: new TypeError(\n preamble + `Invalid extension id: '${extensionId}'`\n )\n })\n\n // Valid Extension IDs are 32 characters in length and use the letter `a` to `p`:\n // https://source.chromium.org/chromium/chromium/src/+/master:components/crx_file/id_util.cc;drc=14a055ccb17e8c8d5d437fe080faba4c6f07beac;l=90\n const isValidExtensionID = str =>\n str.length === 32 && str.toLowerCase().match(/^[a-p]+$/)\n\n /** Mock `chrome.runtime.sendMessage` */\n const sendMessageHandler = {\n apply: function(target, ctx, args) {\n const [extensionId, options, responseCallback] = args || []\n\n // Define custom errors\n const errorPreamble = `Error in invocation of runtime.sendMessage(optional string extensionId, any message, optional object options, optional function responseCallback): `\n const Errors = makeCustomRuntimeErrors(\n errorPreamble,\n `chrome.runtime.sendMessage()`,\n extensionId\n )\n\n // Check if the call signature looks ok\n const noArguments = args.length === 0\n const tooManyArguments = args.length > 4\n const incorrectOptions = options && typeof options !== 'object'\n const incorrectResponseCallback =\n responseCallback && typeof responseCallback !== 'function'\n if (\n noArguments ||\n tooManyArguments ||\n incorrectOptions ||\n incorrectResponseCallback\n ) {\n throw Errors.NoMatchingSignature\n }\n\n // At least 2 arguments are required before we even validate the extension ID\n if (args.length < 2) {\n throw Errors.MustSpecifyExtensionID\n }\n\n // Now let's make sure we got a string as extension ID\n if (typeof extensionId !== 'string') {\n throw Errors.NoMatchingSignature\n }\n\n if (!isValidExtensionID(extensionId)) {\n throw Errors.InvalidExtensionID\n }\n\n return undefined // Normal behavior\n }\n }\n utils.mockWithProxy(\n window.chrome.runtime,\n 'sendMessage',\n function sendMessage() {},\n sendMessageHandler\n )\n\n /**\n * Mock `chrome.runtime.connect`\n *\n * @see https://developer.chrome.com/apps/runtime#method-connect\n */\n const connectHandler = {\n apply: function(target, ctx, args) {\n const [extensionId, connectInfo] = args || []\n\n // Define custom errors\n const errorPreamble = `Error in invocation of runtime.connect(optional string extensionId, optional object connectInfo): `\n const Errors = makeCustomRuntimeErrors(\n errorPreamble,\n `chrome.runtime.connect()`,\n extensionId\n )\n\n // Behavior differs a bit from sendMessage:\n const noArguments = args.length === 0\n const emptyStringArgument = args.length === 1 && extensionId === ''\n if (noArguments || emptyStringArgument) {\n throw Errors.MustSpecifyExtensionID\n }\n\n const tooManyArguments = args.length > 2\n const incorrectConnectInfoType =\n connectInfo && typeof connectInfo !== 'object'\n\n if (tooManyArguments || incorrectConnectInfoType) {\n throw Errors.NoMatchingSignature\n }\n\n const extensionIdIsString = typeof extensionId === 'string'\n if (extensionIdIsString && extensionId === '') {\n throw Errors.MustSpecifyExtensionID\n }\n if (extensionIdIsString && !isValidExtensionID(extensionId)) {\n throw Errors.InvalidExtensionID\n }\n\n // There's another edge-case here: extensionId is optional so we might find a connectInfo object as first param, which we need to validate\n const validateConnectInfo = ci => {\n // More than a first param connectInfo as been provided\n if (args.length > 1) {\n throw Errors.NoMatchingSignature\n }\n // An empty connectInfo has been provided\n if (Object.keys(ci).length === 0) {\n throw Errors.MustSpecifyExtensionID\n }\n // Loop over all connectInfo props an check them\n Object.entries(ci).forEach(([k, v]) => {\n const isExpected = ['name', 'includeTlsChannelId'].includes(k)\n if (!isExpected) {\n throw new TypeError(\n errorPreamble + `Unexpected property: '${k}'.`\n )\n }\n const MismatchError = (propName, expected, found) =>\n TypeError(\n errorPreamble +\n `Error at property '${propName}': Invalid type: expected ${expected}, found ${found}.`\n )\n if (k === 'name' && typeof v !== 'string') {\n throw MismatchError(k, 'string', typeof v)\n }\n if (k === 'includeTlsChannelId' && typeof v !== 'boolean') {\n throw MismatchError(k, 'boolean', typeof v)\n }\n })\n }\n if (typeof extensionId === 'object') {\n validateConnectInfo(extensionId)\n throw Errors.MustSpecifyExtensionID\n }\n\n // Unfortunately even when the connect fails Chrome will return an object with methods we need to mock as well\n return utils.patchToStringNested(makeConnectResponse())\n }\n }\n utils.mockWithProxy(\n window.chrome.runtime,\n 'connect',\n function connect() {},\n connectHandler\n )\n\n function makeConnectResponse() {\n const onSomething = () => ({\n addListener: function addListener() {},\n dispatch: function dispatch() {},\n hasListener: function hasListener() {},\n hasListeners: function hasListeners() {\n return false\n },\n removeListener: function removeListener() {}\n })\n\n const response = {\n name: '',\n sender: undefined,\n disconnect: function disconnect() {},\n onDisconnect: onSomething(),\n onMessage: onSomething(),\n postMessage: function postMessage() {\n if (!arguments.length) {\n throw new TypeError(`Insufficient number of arguments.`)\n }\n throw new Error(`Attempting to use a disconnected port object`)\n }\n }\n return response\n }\n }",_args:[{opts:{runOnInsecureOrigins:!1},STATIC_DATA:{OnInstalledReason:{CHROME_UPDATE:"chrome_update",INSTALL:"install",SHARED_MODULE_UPDATE:"shared_module_update",UPDATE:"update"},OnRestartRequiredReason:{APP_UPDATE:"app_update",OS_UPDATE:"os_update",PERIODIC:"periodic"},PlatformArch:{ARM:"arm",ARM64:"arm64",MIPS:"mips",MIPS64:"mips64",X86_32:"x86-32",X86_64:"x86-64"},PlatformNaclArch:{ARM:"arm",MIPS:"mips",MIPS64:"mips64",X86_32:"x86-32",X86_64:"x86-64"},PlatformOs:{ANDROID:"android",CROS:"cros",LINUX:"linux",MAC:"mac",OPENBSD:"openbsd",WIN:"win"},RequestUpdateCheckStatus:{NO_UPDATE:"no_update",THROTTLED:"throttled",UPDATE_AVAILABLE:"update_available"}}}]}),(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.preloadCache(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {}\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function() {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = stack => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => index !== 1)\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = stack => {\n const stackArr = stack.split('\\n')\n const anchor = `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n // Cache (per-window) the original native toString or use that if available\n utils.preloadCache()\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n utils.preloadCache()\n\n const toStringProxy = new Proxy(Function.prototype.toString, {\n apply: function(target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n })\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n utils.preloadCache()\n\n const toStringProxy = new Proxy(Function.prototype.toString, {\n apply: function(target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n })\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n utils.preloadCache()\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n utils.preloadCache()\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n utils.preloadCache()\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath\n .split('.')\n .slice(0, -1)\n .join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}"},_mainFunction:"utils => {\n /**\n * Input might look funky, we need to normalize it so e.g. whitespace isn't an issue for our spoofing.\n *\n * @example\n * video/webm; codecs=\"vp8, vorbis\"\n * video/mp4; codecs=\"avc1.42E01E\"\n * audio/x-m4a;\n * audio/ogg; codecs=\"vorbis\"\n * @param {String} arg\n */\n const parseInput = arg => {\n const [mime, codecStr] = arg.trim().split(';')\n let codecs = []\n if (codecStr && codecStr.includes('codecs=\"')) {\n codecs = codecStr\n .trim()\n .replace(`codecs=\"`, '')\n .replace(`\"`, '')\n .trim()\n .split(',')\n .filter(x => !!x)\n .map(x => x.trim())\n }\n return {\n mime,\n codecStr,\n codecs\n }\n }\n\n const canPlayType = {\n // Intercept certain requests\n apply: function(target, ctx, args) {\n if (!args || !args.length) {\n return target.apply(ctx, args)\n }\n const { mime, codecs } = parseInput(args[0])\n // This specific mp4 codec is missing in Chromium\n if (mime === 'video/mp4') {\n if (codecs.includes('avc1.42E01E')) {\n return 'probably'\n }\n }\n // This mimetype is only supported if no codecs are specified\n if (mime === 'audio/x-m4a' && !codecs.length) {\n return 'maybe'\n }\n\n // This mimetype is only supported if no codecs are specified\n if (mime === 'audio/aac' && !codecs.length) {\n return 'probably'\n }\n // Everything else as usual\n return target.apply(ctx, args)\n }\n }\n\n /* global HTMLMediaElement */\n utils.replaceWithProxy(\n HTMLMediaElement.prototype,\n 'canPlayType',\n canPlayType\n )\n }",_args:[]}),(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.preloadCache(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {}\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function() {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = stack => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => index !== 1)\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = stack => {\n const stackArr = stack.split('\\n')\n const anchor = `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n // Cache (per-window) the original native toString or use that if available\n utils.preloadCache()\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n utils.preloadCache()\n\n const toStringProxy = new Proxy(Function.prototype.toString, {\n apply: function(target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n })\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n utils.preloadCache()\n\n const toStringProxy = new Proxy(Function.prototype.toString, {\n apply: function(target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n })\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n utils.preloadCache()\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n utils.preloadCache()\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n utils.preloadCache()\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath\n .split('.')\n .slice(0, -1)\n .join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}"},_mainFunction:"(utils, opts) => {\n const patchNavigator = (name, value) =>\n utils.replaceProperty(Object.getPrototypeOf(navigator), name, {\n get() {\n return value\n }\n })\n\n patchNavigator('hardwareConcurrency', opts.hardwareConcurrency || 4)\n }",_args:[{}]}),opts={},Object.defineProperty(Object.getPrototypeOf(navigator),"languages",{get:()=>opts.languages||["en-US","en"]}),(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.preloadCache(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {}\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function() {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = stack => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => index !== 1)\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = stack => {\n const stackArr = stack.split('\\n')\n const anchor = `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n // Cache (per-window) the original native toString or use that if available\n utils.preloadCache()\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n utils.preloadCache()\n\n const toStringProxy = new Proxy(Function.prototype.toString, {\n apply: function(target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n })\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n utils.preloadCache()\n\n const toStringProxy = new Proxy(Function.prototype.toString, {\n apply: function(target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n })\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n utils.preloadCache()\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n utils.preloadCache()\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n utils.preloadCache()\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath\n .split('.')\n .slice(0, -1)\n .join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}"},_mainFunction:"(utils, opts) => {\n const handler = {\n apply: function(target, ctx, args) {\n const param = (args || [])[0]\n\n if (param && param.name && param.name === 'notifications') {\n const result = { state: Notification.permission }\n Object.setPrototypeOf(result, PermissionStatus.prototype)\n return Promise.resolve(result)\n }\n\n return utils.cache.Reflect.apply(...arguments)\n }\n }\n\n utils.replaceWithProxy(\n window.navigator.permissions.__proto__, // eslint-disable-line no-proto\n 'query',\n handler\n )\n }",_args:[{}]}),(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.preloadCache(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {}\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function() {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = stack => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => index !== 1)\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = stack => {\n const stackArr = stack.split('\\n')\n const anchor = `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n // Cache (per-window) the original native toString or use that if available\n utils.preloadCache()\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n utils.preloadCache()\n\n const toStringProxy = new Proxy(Function.prototype.toString, {\n apply: function(target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n })\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n utils.preloadCache()\n\n const toStringProxy = new Proxy(Function.prototype.toString, {\n apply: function(target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n })\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n utils.preloadCache()\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n utils.preloadCache()\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n utils.preloadCache()\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath\n .split('.')\n .slice(0, -1)\n .join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}"},_mainFunction:"(utils, { fns, data }) => {\n fns = utils.materializeFns(fns)\n\n // That means we're running headful\n const hasPlugins = 'plugins' in navigator && navigator.plugins.length\n if (hasPlugins) {\n return // nothing to do here\n }\n\n const mimeTypes = fns.generateMimeTypeArray(utils, fns)(data.mimeTypes)\n const plugins = fns.generatePluginArray(utils, fns)(data.plugins)\n\n // Plugin and MimeType cross-reference each other, let's do that now\n // Note: We're looping through `data.plugins` here, not the generated `plugins`\n for (const pluginData of data.plugins) {\n pluginData.__mimeTypes.forEach((type, index) => {\n plugins[pluginData.name][index] = mimeTypes[type]\n\n Object.defineProperty(plugins[pluginData.name], type, {\n value: mimeTypes[type],\n writable: false,\n enumerable: false, // Not enumerable\n configurable: true\n })\n Object.defineProperty(mimeTypes[type], 'enabledPlugin', {\n value: new Proxy(plugins[pluginData.name], {}), // Prevent circular references\n writable: false,\n enumerable: false, // Important: `JSON.stringify(navigator.plugins)`\n configurable: true\n })\n })\n }\n\n const patchNavigator = (name, value) =>\n utils.replaceProperty(Object.getPrototypeOf(navigator), name, {\n get() {\n return value\n }\n })\n\n patchNavigator('mimeTypes', mimeTypes)\n patchNavigator('plugins', plugins)\n\n // All done\n }",_args:[{fns:{generateMimeTypeArray:"(utils, fns) => mimeTypesData => {\n return fns.generateMagicArray(utils, fns)(\n mimeTypesData,\n MimeTypeArray.prototype,\n MimeType.prototype,\n 'type'\n )\n}",generatePluginArray:"(utils, fns) => pluginsData => {\n return fns.generateMagicArray(utils, fns)(\n pluginsData,\n PluginArray.prototype,\n Plugin.prototype,\n 'name'\n )\n}",generateMagicArray:"(utils, fns) =>\n function(\n dataArray = [],\n proto = MimeTypeArray.prototype,\n itemProto = MimeType.prototype,\n itemMainProp = 'type'\n ) {\n // Quick helper to set props with the same descriptors vanilla is using\n const defineProp = (obj, prop, value) =>\n Object.defineProperty(obj, prop, {\n value,\n writable: false,\n enumerable: false, // Important for mimeTypes & plugins: `JSON.stringify(navigator.mimeTypes)`\n configurable: true\n })\n\n // Loop over our fake data and construct items\n const makeItem = data => {\n const item = {}\n for (const prop of Object.keys(data)) {\n if (prop.startsWith('__')) {\n continue\n }\n defineProp(item, prop, data[prop])\n }\n return patchItem(item, data)\n }\n\n const patchItem = (item, data) => {\n let descriptor = Object.getOwnPropertyDescriptors(item)\n\n // Special case: Plugins have a magic length property which is not enumerable\n // e.g. `navigator.plugins[i].length` should always be the length of the assigned mimeTypes\n if (itemProto === Plugin.prototype) {\n descriptor = {\n ...descriptor,\n length: {\n value: data.__mimeTypes.length,\n writable: false,\n enumerable: false,\n configurable: true // Important to be able to use the ownKeys trap in a Proxy to strip `length`\n }\n }\n }\n\n // We need to spoof a specific `MimeType` or `Plugin` object\n const obj = Object.create(itemProto, descriptor)\n\n // Virtually all property keys are not enumerable in vanilla\n const blacklist = [...Object.keys(data), 'length', 'enabledPlugin']\n return new Proxy(obj, {\n ownKeys(target) {\n return Reflect.ownKeys(target).filter(k => !blacklist.includes(k))\n },\n getOwnPropertyDescriptor(target, prop) {\n if (blacklist.includes(prop)) {\n return undefined\n }\n return Reflect.getOwnPropertyDescriptor(target, prop)\n }\n })\n }\n\n const magicArray = []\n\n // Loop through our fake data and use that to create convincing entities\n dataArray.forEach(data => {\n magicArray.push(makeItem(data))\n })\n\n // Add direct property access based on types (e.g. `obj['application/pdf']`) afterwards\n magicArray.forEach(entry => {\n defineProp(magicArray, entry[itemMainProp], entry)\n })\n\n // This is the best way to fake the type to make sure this is false: `Array.isArray(navigator.mimeTypes)`\n const magicArrayObj = Object.create(proto, {\n ...Object.getOwnPropertyDescriptors(magicArray),\n\n // There's one ugly quirk we unfortunately need to take care of:\n // The `MimeTypeArray` prototype has an enumerable `length` property,\n // but headful Chrome will still skip it when running `Object.getOwnPropertyNames(navigator.mimeTypes)`.\n // To strip it we need to make it first `configurable` and can then overlay a Proxy with an `ownKeys` trap.\n length: {\n value: magicArray.length,\n writable: false,\n enumerable: false,\n configurable: true // Important to be able to use the ownKeys trap in a Proxy to strip `length`\n }\n })\n\n // Generate our functional function mocks :-)\n const functionMocks = fns.generateFunctionMocks(utils)(\n proto,\n itemMainProp,\n magicArray\n )\n\n // We need to overlay our custom object with a JS Proxy\n const magicArrayObjProxy = new Proxy(magicArrayObj, {\n get(target, key = '') {\n // Redirect function calls to our custom proxied versions mocking the vanilla behavior\n if (key === 'item') {\n return functionMocks.item\n }\n if (key === 'namedItem') {\n return functionMocks.namedItem\n }\n if (proto === PluginArray.prototype && key === 'refresh') {\n return functionMocks.refresh\n }\n // Everything else can pass through as normal\n return utils.cache.Reflect.get(...arguments)\n },\n ownKeys(target) {\n // There are a couple of quirks where the original property demonstrates \"magical\" behavior that makes no sense\n // This can be witnessed when calling `Object.getOwnPropertyNames(navigator.mimeTypes)` and the absense of `length`\n // My guess is that it has to do with the recent change of not allowing data enumeration and this being implemented weirdly\n // For that reason we just completely fake the available property names based on our data to match what regular Chrome is doing\n // Specific issues when not patching this: `length` property is available, direct `types` props (e.g. `obj['application/pdf']`) are missing\n const keys = []\n const typeProps = magicArray.map(mt => mt[itemMainProp])\n typeProps.forEach((_, i) => keys.push(`${i}`))\n typeProps.forEach(propName => keys.push(propName))\n return keys\n },\n getOwnPropertyDescriptor(target, prop) {\n if (prop === 'length') {\n return undefined\n }\n return Reflect.getOwnPropertyDescriptor(target, prop)\n }\n })\n\n return magicArrayObjProxy\n }",generateFunctionMocks:"utils => (\n proto,\n itemMainProp,\n dataArray\n) => ({\n /** Returns the MimeType object with the specified index. */\n item: utils.createProxy(proto.item, {\n apply(target, ctx, args) {\n if (!args.length) {\n throw new TypeError(\n `Failed to execute 'item' on '${\n proto[Symbol.toStringTag]\n }': 1 argument required, but only 0 present.`\n )\n }\n // Special behavior alert:\n // - Vanilla tries to cast strings to Numbers (only integers!) and use them as property index lookup\n // - If anything else than an integer (including as string) is provided it will return the first entry\n const isInteger = args[0] && Number.isInteger(Number(args[0])) // Cast potential string to number first, then check for integer\n // Note: Vanilla never returns `undefined`\n return (isInteger ? dataArray[Number(args[0])] : dataArray[0]) || null\n }\n }),\n /** Returns the MimeType object with the specified name. */\n namedItem: utils.createProxy(proto.namedItem, {\n apply(target, ctx, args) {\n if (!args.length) {\n throw new TypeError(\n `Failed to execute 'namedItem' on '${\n proto[Symbol.toStringTag]\n }': 1 argument required, but only 0 present.`\n )\n }\n return dataArray.find(mt => mt[itemMainProp] === args[0]) || null // Not `undefined`!\n }\n }),\n /** Does nothing and shall return nothing */\n refresh: proto.refresh\n ? utils.createProxy(proto.refresh, {\n apply(target, ctx, args) {\n return undefined\n }\n })\n : undefined\n})"},data:{mimeTypes:[{type:"application/pdf",suffixes:"pdf",description:"",__pluginName:"Chrome PDF Viewer"},{type:"application/x-google-chrome-pdf",suffixes:"pdf",description:"Portable Document Format",__pluginName:"Chrome PDF Plugin"},{type:"application/x-nacl",suffixes:"",description:"Native Client Executable",__pluginName:"Native Client"},{type:"application/x-pnacl",suffixes:"",description:"Portable Native Client Executable",__pluginName:"Native Client"}],plugins:[{name:"Chrome PDF Plugin",filename:"internal-pdf-viewer",description:"Portable Document Format",__mimeTypes:["application/x-google-chrome-pdf"]},{name:"Chrome PDF Viewer",filename:"mhjfbmdgcfjbbpaeojofohoefgiehjai",description:"",__mimeTypes:["application/pdf"]},{name:"Native Client",filename:"internal-nacl-plugin",description:"",__mimeTypes:["application/x-nacl","application/x-pnacl"]}]}}]}),delete Object.getPrototypeOf(navigator).webdriver,(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.preloadCache(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {}\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function() {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = stack => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => index !== 1)\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = stack => {\n const stackArr = stack.split('\\n')\n const anchor = `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n // Cache (per-window) the original native toString or use that if available\n utils.preloadCache()\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n utils.preloadCache()\n\n const toStringProxy = new Proxy(Function.prototype.toString, {\n apply: function(target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n })\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n utils.preloadCache()\n\n const toStringProxy = new Proxy(Function.prototype.toString, {\n apply: function(target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n })\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n utils.preloadCache()\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n utils.preloadCache()\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n utils.preloadCache()\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath\n .split('.')\n .slice(0, -1)\n .join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}"},_mainFunction:"(utils, opts) => {\n const getParameterProxyHandler = {\n apply: function(target, ctx, args) {\n const param = (args || [])[0]\n // UNMASKED_VENDOR_WEBGL\n if (param === 37445) {\n return opts.vendor || 'Intel Inc.' // default in headless: Google Inc.\n }\n // UNMASKED_RENDERER_WEBGL\n if (param === 37446) {\n return opts.renderer || 'Intel Iris OpenGL Engine' // default in headless: Google SwiftShader\n }\n return utils.cache.Reflect.apply(target, ctx, args)\n }\n }\n\n // There's more than one WebGL rendering context\n // https://developer.mozilla.org/en-US/docs/Web/API/WebGL2RenderingContext#Browser_compatibility\n // To find out the original values here: Object.getOwnPropertyDescriptors(WebGLRenderingContext.prototype.getParameter)\n const addProxy = (obj, propName) => {\n utils.replaceWithProxy(obj, propName, getParameterProxyHandler)\n }\n // For whatever weird reason loops don't play nice with Object.defineProperty, here's the next best thing:\n addProxy(WebGLRenderingContext.prototype, 'getParameter')\n addProxy(WebGL2RenderingContext.prototype, 'getParameter')\n }",_args:[{}]}),(()=>{try{if(window.outerWidth&&window.outerHeight)return;const n=85;window.outerWidth=window.innerWidth,window.outerHeight=window.innerHeight+n}catch(n){}})(),(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.preloadCache(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {}\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function() {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = stack => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => index !== 1)\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = stack => {\n const stackArr = stack.split('\\n')\n const anchor = `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n // Cache (per-window) the original native toString or use that if available\n utils.preloadCache()\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n utils.preloadCache()\n\n const toStringProxy = new Proxy(Function.prototype.toString, {\n apply: function(target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n })\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n utils.preloadCache()\n\n const toStringProxy = new Proxy(Function.prototype.toString, {\n apply: function(target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n })\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n utils.preloadCache()\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n utils.preloadCache()\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n utils.preloadCache()\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath\n .split('.')\n .slice(0, -1)\n .join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}"},_mainFunction:"(utils, opts) => {\n try {\n // Adds a contentWindow proxy to the provided iframe element\n const addContentWindowProxy = iframe => {\n const contentWindowProxy = {\n get(target, key) {\n // Now to the interesting part:\n // We actually make this thing behave like a regular iframe window,\n // by intercepting calls to e.g. `.self` and redirect it to the correct thing. :)\n // That makes it possible for these assertions to be correct:\n // iframe.contentWindow.self === window.top // must be false\n if (key === 'self') {\n return this\n }\n // iframe.contentWindow.frameElement === iframe // must be true\n if (key === 'frameElement') {\n return iframe\n }\n return Reflect.get(target, key)\n }\n }\n\n if (!iframe.contentWindow) {\n const proxy = new Proxy(window, contentWindowProxy)\n Object.defineProperty(iframe, 'contentWindow', {\n get() {\n return proxy\n },\n set(newValue) {\n return newValue // contentWindow is immutable\n },\n enumerable: true,\n configurable: false\n })\n }\n }\n\n // Handles iframe element creation, augments `srcdoc` property so we can intercept further\n const handleIframeCreation = (target, thisArg, args) => {\n const iframe = target.apply(thisArg, args)\n\n // We need to keep the originals around\n const _iframe = iframe\n const _srcdoc = _iframe.srcdoc\n\n // Add hook for the srcdoc property\n // We need to be very surgical here to not break other iframes by accident\n Object.defineProperty(iframe, 'srcdoc', {\n configurable: true, // Important, so we can reset this later\n get: function() {\n return _iframe.srcdoc\n },\n set: function(newValue) {\n addContentWindowProxy(this)\n // Reset property, the hook is only needed once\n Object.defineProperty(iframe, 'srcdoc', {\n configurable: false,\n writable: false,\n value: _srcdoc\n })\n _iframe.srcdoc = newValue\n }\n })\n return iframe\n }\n\n // Adds a hook to intercept iframe creation events\n const addIframeCreationSniffer = () => {\n /* global document */\n const createElementHandler = {\n // Make toString() native\n get(target, key) {\n return Reflect.get(target, key)\n },\n apply: function(target, thisArg, args) {\n const isIframe =\n args && args.length && `${args[0]}`.toLowerCase() === 'iframe'\n if (!isIframe) {\n // Everything as usual\n return target.apply(thisArg, args)\n } else {\n return handleIframeCreation(target, thisArg, args)\n }\n }\n }\n // All this just due to iframes with srcdoc bug\n utils.replaceWithProxy(\n document,\n 'createElement',\n createElementHandler\n )\n }\n\n // Let's go\n addIframeCreationSniffer()\n } catch (err) {\n // console.warn(err)\n }\n }",_args:[]});''' 80 | 81 | SCRIPTS = [ 82 | # SET_USER_AGENT, 83 | SET_APP_VERSION, 84 | EXTEND_LANGUAGES, 85 | # EXTEND_PLUGINS, # 影响 Plugins is of type PluginArray 检测 86 | EXTEND_MIME_TYPES, 87 | SET_CHROME_INFO, 88 | SET_PERMISSION, 89 | SET_WEBGL, 90 | SET_WEBDRIVER, 91 | ] 92 | -------------------------------------------------------------------------------- /gerapy_pyppeteer/request.py: -------------------------------------------------------------------------------- 1 | from scrapy import Request 2 | import copy 3 | 4 | 5 | class PyppeteerRequest(Request): 6 | """ 7 | Scrapy ``Request`` subclass providing additional arguments 8 | """ 9 | 10 | def __init__(self, url, callback=None, wait_until=None, wait_for=None, script=None, actions=None, proxy=None, 11 | proxy_credential=None, sleep=None, timeout=None, ignore_resource_types=None, pretend=None, screenshot=None, meta=None, 12 | *args, **kwargs): 13 | """ 14 | :param url: request url 15 | :param callback: callback 16 | :param one of "load", "domcontentloaded", "networkidle0", "networkidle2". 17 | see https://miyakogi.github.io/pyppeteer/reference.html#pyppeteer.page.Page.goto, default is `domcontentloaded` 18 | :param wait_for: wait for some element to load, also supports dict 19 | :param script: script to execute 20 | :param actions: actions defined for execution of Page object 21 | :param proxy: use proxy for this time, like `http://x.x.x.x:x` 22 | :param proxy_credential: the proxy credential, like `{'username': 'xxxx', 'password': 'xxxx'}` 23 | :param sleep: time to sleep after loaded, override `GERAPY_PYPPETEER_SLEEP` 24 | :param timeout: load timeout, override `GERAPY_PYPPETEER_DOWNLOAD_TIMEOUT` 25 | :param ignore_resource_types: ignored resource types, override `GERAPY_PYPPETEER_IGNORE_RESOURCE_TYPES` 26 | :param pretend: pretend as normal browser, override `GERAPY_PYPPETEER_PRETEND` 27 | :param screenshot: ignored resource types, see 28 | https://miyakogi.github.io/pyppeteer/_modules/pyppeteer/page.html#Page.screenshot, 29 | override `GERAPY_PYPPETEER_SCREENSHOT` 30 | :param args: 31 | :param kwargs: 32 | """ 33 | # use meta info to save args 34 | meta = copy.deepcopy(meta) or {} 35 | pyppeteer_meta = meta.get('pyppeteer') or {} 36 | 37 | self.wait_until = pyppeteer_meta.get('wait_until') if pyppeteer_meta.get( 38 | 'wait_until') is not None else (wait_until or 'domcontentloaded') 39 | self.wait_for = pyppeteer_meta.get('wait_for') if pyppeteer_meta.get( 40 | 'wait_for') is not None else wait_for 41 | self.script = pyppeteer_meta.get('script') if pyppeteer_meta.get( 42 | 'script') is not None else script 43 | self.actions = pyppeteer_meta.get('actions') if pyppeteer_meta.get( 44 | 'actions') is not None else actions 45 | self.sleep = pyppeteer_meta.get('sleep') if pyppeteer_meta.get( 46 | 'sleep') is not None else sleep 47 | self.proxy = pyppeteer_meta.get('proxy') if pyppeteer_meta.get( 48 | 'proxy') is not None else proxy 49 | self.proxy_credential = pyppeteer_meta.get('proxy_credential') if pyppeteer_meta.get( 50 | 'proxy_credential') is not None else proxy_credential 51 | self.pretend = pyppeteer_meta.get('pretend') if pyppeteer_meta.get( 52 | 'pretend') is not None else pretend 53 | self.timeout = pyppeteer_meta.get('timeout') if pyppeteer_meta.get( 54 | 'timeout') is not None else timeout 55 | self.ignore_resource_types = pyppeteer_meta.get('ignore_resource_types') if pyppeteer_meta.get( 56 | 'ignore_resource_types') is not None else ignore_resource_types 57 | self.screenshot = pyppeteer_meta.get('screenshot') if pyppeteer_meta.get( 58 | 'screenshot') is not None else screenshot 59 | 60 | pyppeteer_meta = meta.setdefault('pyppeteer', {}) 61 | pyppeteer_meta['wait_until'] = self.wait_until 62 | pyppeteer_meta['wait_for'] = self.wait_for 63 | pyppeteer_meta['script'] = self.script 64 | pyppeteer_meta['actions'] = self.actions 65 | pyppeteer_meta['sleep'] = self.sleep 66 | pyppeteer_meta['proxy'] = self.proxy 67 | pyppeteer_meta['proxy_credential'] = self.proxy_credential 68 | pyppeteer_meta['pretend'] = self.pretend 69 | pyppeteer_meta['timeout'] = self.timeout 70 | pyppeteer_meta['screenshot'] = self.screenshot 71 | pyppeteer_meta['ignore_resource_types'] = self.ignore_resource_types 72 | 73 | super().__init__(url, callback, meta=meta, *args, **kwargs) 74 | -------------------------------------------------------------------------------- /gerapy_pyppeteer/settings.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | # pyppeteer logging level 4 | GERAPY_PYPPETEER_LOGGING_LEVEL = logging.WARNING 5 | 6 | # pyppeteer timeout 7 | GERAPY_PYPPETEER_DOWNLOAD_TIMEOUT = 30 8 | 9 | # pyppeteer browser window 10 | GERAPY_PYPPETEER_WINDOW_WIDTH = 1400 11 | GERAPY_PYPPETEER_WINDOW_HEIGHT = 700 12 | 13 | # pyppeteer browser default ua 14 | GERAPY_PYPPETEER_DEFAULT_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' 15 | 16 | # pyppeteer settings 17 | GERAPY_PYPPETEER_HEADLESS = True 18 | GERAPY_PYPPETEER_EXECUTABLE_PATH = None 19 | GERAPY_PYPPETEER_IGNORE_HTTPS_ERRORS = False 20 | GERAPY_PYPPETEER_SLOW_MO = None 21 | GERAPY_PYPPETEER_IGNORE_DEFAULT_ARGS = False 22 | GERAPY_PYPPETEER_HANDLE_SIGINT = True 23 | GERAPY_PYPPETEER_HANDLE_SIGTERM = True 24 | GERAPY_PYPPETEER_HANDLE_SIGHUP = True 25 | GERAPY_PYPPETEER_DUMPIO = False 26 | GERAPY_PYPPETEER_DEVTOOLS = False 27 | GERAPY_PYPPETEER_AUTO_CLOSE = True 28 | GERAPY_PYPPETEER_PRETEND = True 29 | 30 | # pyppeteer args 31 | GERAPY_PYPPETEER_DISABLE_EXTENSIONS = True 32 | GERAPY_PYPPETEER_HIDE_SCROLLBARS = True 33 | GERAPY_PYPPETEER_MUTE_AUDIO = True 34 | GERAPY_PYPPETEER_NO_SANDBOX = True 35 | GERAPY_PYPPETEER_DISABLE_SETUID_SANDBOX = True 36 | GERAPY_PYPPETEER_DISABLE_GPU = True 37 | 38 | # ignore resource types, ResourceType will be one of the following: ``document``, 39 | # ``stylesheet``, ``image``, ``media``, ``font``, ``script``, 40 | # ``texttrack``, ``xhr``, ``fetch``, ``eventsource``, ``websocket``, 41 | # ``manifest``, ``other``. 42 | GERAPY_PYPPETEER_IGNORE_RESOURCE_TYPES = [] 43 | GERAPY_PYPPETEER_SCREENSHOT = None 44 | GERAPY_PYPPETEER_SLEEP = 1 45 | GERAPY_ENABLE_REQUEST_INTERCEPTION = False 46 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scrapy>=2.0.0 2 | pyppeteer>=0.2.2 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from os.path import join, isfile 5 | from os import walk 6 | import io 7 | import os 8 | import sys 9 | from shutil import rmtree 10 | from setuptools import find_packages, setup, Command 11 | 12 | 13 | def read_file(filename): 14 | with open(filename) as fp: 15 | return fp.read().strip() 16 | 17 | 18 | def read_requirements(filename): 19 | return [line.strip() for line in read_file(filename).splitlines() 20 | if not line.startswith('#')] 21 | 22 | 23 | NAME = 'gerapy-pyppeteer' 24 | FOLDER = 'gerapy_pyppeteer' 25 | DESCRIPTION = 'Pyppeteer Components for Scrapy & Gerapy' 26 | URL = 'https://github.com/Gerapy/GerapyPyppeteer' 27 | EMAIL = 'cqc@cuiqingcai.com' 28 | AUTHOR = 'Germey' 29 | REQUIRES_PYTHON = '>=3.6.0' 30 | VERSION = None 31 | 32 | REQUIRED = read_requirements('requirements.txt') 33 | 34 | here = os.path.abspath(os.path.dirname(__file__)) 35 | 36 | try: 37 | with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f: 38 | long_description = '\n' + f.read() 39 | except FileNotFoundError: 40 | long_description = DESCRIPTION 41 | 42 | about = {} 43 | if not VERSION: 44 | with open(os.path.join(here, FOLDER, '__version__.py')) as f: 45 | exec(f.read(), about) 46 | else: 47 | about['__version__'] = VERSION 48 | 49 | 50 | def package_files(directories): 51 | paths = [] 52 | for item in directories: 53 | if isfile(item): 54 | paths.append(join('..', item)) 55 | continue 56 | for (path, directories, filenames) in walk(item): 57 | for filename in filenames: 58 | paths.append(join('..', path, filename)) 59 | return paths 60 | 61 | 62 | class UploadCommand(Command): 63 | description = 'Build and publish the package.' 64 | user_options = [] 65 | 66 | @staticmethod 67 | def status(s): 68 | """Prints things in bold.""" 69 | print('\033[1m{0}\033[0m'.format(s)) 70 | 71 | def initialize_options(self): 72 | pass 73 | 74 | def finalize_options(self): 75 | pass 76 | 77 | def run(self): 78 | try: 79 | self.status('Removing previous builds…') 80 | rmtree(os.path.join(here, 'dist')) 81 | except OSError: 82 | pass 83 | 84 | self.status('Building Source and Wheel (universal) distribution…') 85 | os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable)) 86 | 87 | self.status('Uploading the package to PyPI via Twine…') 88 | os.system('twine upload dist/*') 89 | 90 | self.status('Pushing git tags…') 91 | os.system('git tag v{0}'.format(about['__version__'])) 92 | os.system('git push --tags') 93 | 94 | sys.exit() 95 | 96 | 97 | setup( 98 | name=NAME, 99 | version=about['__version__'], 100 | description=DESCRIPTION, 101 | long_description=long_description, 102 | long_description_content_type='text/markdown', 103 | author=AUTHOR, 104 | author_email=EMAIL, 105 | python_requires=REQUIRES_PYTHON, 106 | url=URL, 107 | packages=find_packages(exclude=('tests',)), 108 | install_requires=REQUIRED, 109 | include_package_data=True, 110 | license='MIT', 111 | classifiers=[ 112 | 'License :: OSI Approved :: MIT License', 113 | 'Programming Language :: Python :: 3.6', 114 | 'Programming Language :: Python :: 3.7', 115 | 'Programming Language :: Python :: 3.8', 116 | 'Programming Language :: Python :: Implementation :: CPython', 117 | 'Programming Language :: Python :: Implementation :: PyPy' 118 | ], 119 | # $ setup.py publish support. 120 | cmdclass={ 121 | 'upload': UploadCommand, 122 | }, 123 | ) -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gerapy/GerapyPyppeteer/0143b2f862d3455e742c72aa541b0a6e89689627/tests/__init__.py --------------------------------------------------------------------------------